{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Use Off-Policy VPG with Baseline to Play CartPole-v0\n",
    "\n",
    "PyTorch version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "import sys\n",
    "import logging\n",
    "import itertools\n",
    "\n",
    "import numpy as np\n",
    "np.random.seed(0)\n",
    "import pandas as pd\n",
    "import gym\n",
    "import matplotlib.pyplot as plt\n",
    "import torch\n",
    "torch.manual_seed(0)\n",
    "import torch.nn as nn\n",
    "import torch.optim as optim\n",
    "import torch.distributions as distributions\n",
    "\n",
    "logging.basicConfig(level=logging.DEBUG,\n",
    "        format='%(asctime)s [%(levelname)s] %(message)s',\n",
    "        stream=sys.stdout, datefmt='%H:%M:%S')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "00:02:21 [INFO] env: <CartPoleEnv<CartPole-v0>>\n",
      "00:02:21 [INFO] action_space: Discrete(2)\n",
      "00:02:21 [INFO] observation_space: Box(-3.4028234663852886e+38, 3.4028234663852886e+38, (4,), float32)\n",
      "00:02:21 [INFO] reward_range: (-inf, inf)\n",
      "00:02:21 [INFO] metadata: {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}\n",
      "00:02:21 [INFO] _max_episode_steps: 200\n",
      "00:02:21 [INFO] _elapsed_steps: None\n",
      "00:02:21 [INFO] id: CartPole-v0\n",
      "00:02:21 [INFO] entry_point: gym.envs.classic_control:CartPoleEnv\n",
      "00:02:21 [INFO] reward_threshold: 195.0\n",
      "00:02:21 [INFO] nondeterministic: False\n",
      "00:02:21 [INFO] max_episode_steps: 200\n",
      "00:02:21 [INFO] _kwargs: {}\n",
      "00:02:21 [INFO] _env_name: CartPole\n"
     ]
    }
   ],
   "source": [
    "env = gym.make('CartPole-v0')\n",
    "env.seed(0)\n",
    "for key in vars(env):\n",
    "    logging.info('%s: %s', key, vars(env)[key])\n",
    "for key in vars(env.spec):\n",
    "    logging.info('%s: %s', key, vars(env.spec)[key])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "class OffPolicyVPGwBaselineAgent:\n",
    "    def __init__(self, env,):\n",
    "        self.action_n = env.action_space.n\n",
    "        self.gamma = 0.99\n",
    "\n",
    "        self.policy_net = self.build_net(\n",
    "                input_size=env.observation_space.shape[0],\n",
    "                hidden_sizes=[],\n",
    "                output_size=self.action_n, output_activator=nn.Softmax(1))\n",
    "        self.policy_optimizer = optim.Adam(self.policy_net.parameters(), lr=0.06)\n",
    "        self.baseline_net = self.build_net(\n",
    "                input_size=env.observation_space.shape[0],\n",
    "                hidden_sizes=[])\n",
    "        self.baseline_optimizer = optim.Adam(self.policy_net.parameters(), lr=0.1)\n",
    "        self.baseline_loss = nn.MSELoss()\n",
    "\n",
    "    def build_net(self, input_size, hidden_sizes, output_size=1,\n",
    "            output_activator=None, use_bias=False):\n",
    "        layers = []\n",
    "        for input_size, output_size in zip(\n",
    "                [input_size,] + hidden_sizes, hidden_sizes + [output_size,]):\n",
    "            layers.append(nn.Linear(input_size, output_size, bias=use_bias))\n",
    "            layers.append(nn.ReLU())\n",
    "        layers = layers[:-1]\n",
    "        if output_activator:\n",
    "            layers.append(output_activator)\n",
    "        model = nn.Sequential(*layers)\n",
    "        return model\n",
    "\n",
    "    def reset(self, mode=None):\n",
    "        self.mode = mode\n",
    "        if self.mode == 'train':\n",
    "            self.trajectory = []\n",
    "\n",
    "    def step(self, observation, reward, done):\n",
    "        if self.mode == 'train':\n",
    "            action = np.random.choice(self.action_n) # use random policy\n",
    "            self.trajectory += [observation, reward, done, action]\n",
    "        else:\n",
    "            state_tensor = torch.as_tensor(observation,\n",
    "                    dtype=torch.float).unsqueeze(0)\n",
    "            prob_tensor = self.policy_net(state_tensor)\n",
    "            action_tensor = distributions.Categorical(prob_tensor).sample()\n",
    "            action = action_tensor.numpy()[0]\n",
    "        return action\n",
    "\n",
    "    def close(self):\n",
    "        if self.mode == 'train':\n",
    "            self.learn()\n",
    "\n",
    "    def learn(self):\n",
    "        state_tensor = torch.as_tensor(self.trajectory[0::4], dtype=torch.float)\n",
    "        reward_tensor = torch.as_tensor(self.trajectory[1::4], dtype=torch.float)\n",
    "        action_tensor = torch.as_tensor(self.trajectory[3::4], dtype=torch.long)\n",
    "        arange_tensor = torch.arange(state_tensor.shape[0], dtype=torch.float)\n",
    "\n",
    "        # train baseline\n",
    "        discount_tensor = self.gamma ** arange_tensor\n",
    "        discounted_reward_tensor = discount_tensor * reward_tensor\n",
    "        discounted_return_tensor = discounted_reward_tensor.flip(\n",
    "                0).cumsum(0).flip(0)\n",
    "        return_tensor = discounted_return_tensor / discount_tensor\n",
    "        pred_tensor = self.baseline_net(state_tensor)\n",
    "        psi_tensor = (discounted_return_tensor -\n",
    "                discount_tensor * pred_tensor).detach()\n",
    "        baseline_loss_tensor = self.baseline_loss(pred_tensor,\n",
    "                return_tensor.unsqueeze(1))\n",
    "        self.baseline_optimizer.zero_grad()\n",
    "        baseline_loss_tensor.backward()\n",
    "        self.baseline_optimizer.step()\n",
    "\n",
    "        # train policy\n",
    "        all_pi_tensor = self.policy_net(state_tensor)\n",
    "        pi_tensor = torch.gather(all_pi_tensor, 1,\n",
    "                action_tensor.unsqueeze(1)).squeeze(1)\n",
    "        behavior_prob = 1. / self.action_n\n",
    "        policy_loss_tensor = -(psi_tensor / behavior_prob * pi_tensor).mean()\n",
    "        self.policy_optimizer.zero_grad()\n",
    "        policy_loss_tensor.backward()\n",
    "        self.policy_optimizer.step()\n",
    "\n",
    "\n",
    "agent = OffPolicyVPGwBaselineAgent(env)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "00:02:21 [INFO] ==== train ====\n",
      "00:02:21 [DEBUG] train episode 0: reward = 39.00, steps = 39\n",
      "00:02:21 [DEBUG] train episode 1: reward = 56.00, steps = 56\n",
      "00:02:21 [DEBUG] train episode 2: reward = 61.00, steps = 61\n",
      "00:02:21 [DEBUG] train episode 3: reward = 72.00, steps = 72\n",
      "00:02:21 [DEBUG] train episode 4: reward = 45.00, steps = 45\n",
      "00:02:21 [DEBUG] train episode 5: reward = 39.00, steps = 39\n",
      "00:02:21 [DEBUG] train episode 6: reward = 28.00, steps = 28\n",
      "00:02:21 [DEBUG] train episode 7: reward = 129.00, steps = 129\n",
      "00:02:21 [DEBUG] train episode 8: reward = 17.00, steps = 17\n",
      "00:02:21 [DEBUG] train episode 9: reward = 24.00, steps = 24\n",
      "00:02:21 [DEBUG] train episode 10: reward = 51.00, steps = 51\n",
      "00:02:21 [DEBUG] train episode 11: reward = 16.00, steps = 16\n",
      "00:02:21 [DEBUG] train episode 12: reward = 15.00, steps = 15\n",
      "00:02:22 [DEBUG] train episode 13: reward = 32.00, steps = 32\n",
      "00:02:22 [DEBUG] train episode 14: reward = 79.00, steps = 79\n",
      "00:02:22 [DEBUG] train episode 15: reward = 44.00, steps = 44\n",
      "00:02:22 [DEBUG] train episode 16: reward = 18.00, steps = 18\n",
      "00:02:22 [DEBUG] train episode 17: reward = 17.00, steps = 17\n",
      "00:02:22 [DEBUG] train episode 18: reward = 19.00, steps = 19\n",
      "00:02:22 [DEBUG] train episode 19: reward = 49.00, steps = 49\n",
      "00:02:22 [DEBUG] train episode 20: reward = 47.00, steps = 47\n",
      "00:02:22 [DEBUG] train episode 21: reward = 68.00, steps = 68\n",
      "00:02:22 [DEBUG] train episode 22: reward = 48.00, steps = 48\n",
      "00:02:22 [DEBUG] train episode 23: reward = 91.00, steps = 91\n",
      "00:02:22 [DEBUG] train episode 24: reward = 9.00, steps = 9\n",
      "00:02:22 [DEBUG] train episode 25: reward = 47.00, steps = 47\n",
      "00:02:22 [DEBUG] train episode 26: reward = 59.00, steps = 59\n",
      "00:02:22 [DEBUG] train episode 27: reward = 104.00, steps = 104\n",
      "00:02:22 [DEBUG] train episode 28: reward = 83.00, steps = 83\n",
      "00:02:22 [DEBUG] train episode 29: reward = 25.00, steps = 25\n",
      "00:02:22 [DEBUG] train episode 30: reward = 36.00, steps = 36\n",
      "00:02:22 [DEBUG] train episode 31: reward = 91.00, steps = 91\n",
      "00:02:22 [DEBUG] train episode 32: reward = 46.00, steps = 46\n",
      "00:02:22 [DEBUG] train episode 33: reward = 20.00, steps = 20\n",
      "00:02:22 [DEBUG] train episode 34: reward = 20.00, steps = 20\n",
      "00:02:22 [DEBUG] train episode 35: reward = 54.00, steps = 54\n",
      "00:02:22 [DEBUG] train episode 36: reward = 101.00, steps = 101\n",
      "00:02:22 [DEBUG] train episode 37: reward = 39.00, steps = 39\n",
      "00:02:22 [DEBUG] train episode 38: reward = 16.00, steps = 16\n",
      "00:02:22 [DEBUG] train episode 39: reward = 29.00, steps = 29\n",
      "00:02:22 [DEBUG] train episode 40: reward = 61.00, steps = 61\n",
      "00:02:22 [DEBUG] train episode 41: reward = 64.00, steps = 64\n",
      "00:02:22 [DEBUG] train episode 42: reward = 124.00, steps = 124\n",
      "00:02:22 [DEBUG] train episode 43: reward = 82.00, steps = 82\n",
      "00:02:22 [DEBUG] train episode 44: reward = 81.00, steps = 81\n",
      "00:02:22 [DEBUG] train episode 45: reward = 115.00, steps = 115\n",
      "00:02:22 [DEBUG] train episode 46: reward = 49.00, steps = 49\n",
      "00:02:22 [DEBUG] train episode 47: reward = 55.00, steps = 55\n",
      "00:02:22 [DEBUG] train episode 48: reward = 47.00, steps = 47\n",
      "00:02:22 [DEBUG] train episode 49: reward = 49.00, steps = 49\n",
      "00:02:22 [DEBUG] train episode 50: reward = 51.00, steps = 51\n",
      "00:02:22 [DEBUG] train episode 51: reward = 103.00, steps = 103\n",
      "00:02:22 [DEBUG] train episode 52: reward = 93.00, steps = 93\n",
      "00:02:22 [DEBUG] train episode 53: reward = 114.00, steps = 114\n",
      "00:02:22 [DEBUG] train episode 54: reward = 36.00, steps = 36\n",
      "00:02:22 [DEBUG] train episode 55: reward = 115.00, steps = 115\n",
      "00:02:22 [DEBUG] train episode 56: reward = 35.00, steps = 35\n",
      "00:02:22 [DEBUG] train episode 57: reward = 54.00, steps = 54\n",
      "00:02:22 [DEBUG] train episode 58: reward = 48.00, steps = 48\n",
      "00:02:22 [DEBUG] train episode 59: reward = 120.00, steps = 120\n",
      "00:02:23 [DEBUG] train episode 60: reward = 35.00, steps = 35\n",
      "00:02:23 [DEBUG] train episode 61: reward = 74.00, steps = 74\n",
      "00:02:23 [DEBUG] train episode 62: reward = 23.00, steps = 23\n",
      "00:02:23 [DEBUG] train episode 63: reward = 74.00, steps = 74\n",
      "00:02:23 [DEBUG] train episode 64: reward = 57.00, steps = 57\n",
      "00:02:23 [DEBUG] train episode 65: reward = 84.00, steps = 84\n",
      "00:02:23 [DEBUG] train episode 66: reward = 47.00, steps = 47\n",
      "00:02:23 [DEBUG] train episode 67: reward = 23.00, steps = 23\n",
      "00:02:23 [DEBUG] train episode 68: reward = 89.00, steps = 89\n",
      "00:02:23 [DEBUG] train episode 69: reward = 89.00, steps = 89\n",
      "00:02:23 [DEBUG] train episode 70: reward = 34.00, steps = 34\n",
      "00:02:23 [DEBUG] train episode 71: reward = 85.00, steps = 85\n",
      "00:02:23 [DEBUG] train episode 72: reward = 34.00, steps = 34\n",
      "00:02:23 [DEBUG] train episode 73: reward = 74.00, steps = 74\n",
      "00:02:23 [DEBUG] train episode 74: reward = 48.00, steps = 48\n",
      "00:02:23 [DEBUG] train episode 75: reward = 41.00, steps = 41\n",
      "00:02:23 [DEBUG] train episode 76: reward = 48.00, steps = 48\n",
      "00:02:23 [DEBUG] train episode 77: reward = 62.00, steps = 62\n",
      "00:02:23 [DEBUG] train episode 78: reward = 37.00, steps = 37\n",
      "00:02:23 [DEBUG] train episode 79: reward = 54.00, steps = 54\n",
      "00:02:23 [DEBUG] train episode 80: reward = 76.00, steps = 76\n",
      "00:02:23 [DEBUG] train episode 81: reward = 68.00, steps = 68\n",
      "00:02:23 [DEBUG] train episode 82: reward = 94.00, steps = 94\n",
      "00:02:23 [DEBUG] train episode 83: reward = 61.00, steps = 61\n",
      "00:02:23 [DEBUG] train episode 84: reward = 69.00, steps = 69\n",
      "00:02:23 [DEBUG] train episode 85: reward = 41.00, steps = 41\n",
      "00:02:23 [DEBUG] train episode 86: reward = 26.00, steps = 26\n",
      "00:02:23 [DEBUG] train episode 87: reward = 63.00, steps = 63\n",
      "00:02:23 [DEBUG] train episode 88: reward = 42.00, steps = 42\n",
      "00:02:23 [DEBUG] train episode 89: reward = 25.00, steps = 25\n",
      "00:02:23 [DEBUG] train episode 90: reward = 45.00, steps = 45\n",
      "00:02:23 [DEBUG] train episode 91: reward = 82.00, steps = 82\n",
      "00:02:23 [DEBUG] train episode 92: reward = 67.00, steps = 67\n",
      "00:02:23 [DEBUG] train episode 93: reward = 59.00, steps = 59\n",
      "00:02:23 [DEBUG] train episode 94: reward = 62.00, steps = 62\n",
      "00:02:23 [DEBUG] train episode 95: reward = 49.00, steps = 49\n",
      "00:02:23 [DEBUG] train episode 96: reward = 55.00, steps = 55\n",
      "00:02:23 [DEBUG] train episode 97: reward = 145.00, steps = 145\n",
      "00:02:23 [DEBUG] train episode 98: reward = 45.00, steps = 45\n",
      "00:02:23 [DEBUG] train episode 99: reward = 57.00, steps = 57\n",
      "00:02:23 [DEBUG] train episode 100: reward = 35.00, steps = 35\n",
      "00:02:23 [DEBUG] train episode 101: reward = 140.00, steps = 140\n",
      "00:02:23 [DEBUG] train episode 102: reward = 55.00, steps = 55\n",
      "00:02:23 [DEBUG] train episode 103: reward = 57.00, steps = 57\n",
      "00:02:23 [DEBUG] train episode 104: reward = 33.00, steps = 33\n",
      "00:02:23 [DEBUG] train episode 105: reward = 72.00, steps = 72\n",
      "00:02:23 [DEBUG] train episode 106: reward = 86.00, steps = 86\n",
      "00:02:24 [DEBUG] train episode 107: reward = 61.00, steps = 61\n",
      "00:02:24 [DEBUG] train episode 108: reward = 89.00, steps = 89\n",
      "00:02:24 [DEBUG] train episode 109: reward = 55.00, steps = 55\n",
      "00:02:24 [DEBUG] train episode 110: reward = 65.00, steps = 65\n",
      "00:02:24 [DEBUG] train episode 111: reward = 37.00, steps = 37\n",
      "00:02:24 [DEBUG] train episode 112: reward = 62.00, steps = 62\n",
      "00:02:24 [DEBUG] train episode 113: reward = 57.00, steps = 57\n",
      "00:02:24 [DEBUG] train episode 114: reward = 50.00, steps = 50\n",
      "00:02:24 [DEBUG] train episode 115: reward = 62.00, steps = 62\n",
      "00:02:24 [DEBUG] train episode 116: reward = 83.00, steps = 83\n",
      "00:02:24 [DEBUG] train episode 117: reward = 79.00, steps = 79\n",
      "00:02:24 [DEBUG] train episode 118: reward = 41.00, steps = 41\n",
      "00:02:24 [DEBUG] train episode 119: reward = 48.00, steps = 48\n",
      "00:02:24 [DEBUG] train episode 120: reward = 55.00, steps = 55\n",
      "00:02:24 [DEBUG] train episode 121: reward = 63.00, steps = 63\n",
      "00:02:24 [DEBUG] train episode 122: reward = 53.00, steps = 53\n",
      "00:02:24 [DEBUG] train episode 123: reward = 83.00, steps = 83\n",
      "00:02:24 [DEBUG] train episode 124: reward = 108.00, steps = 108\n",
      "00:02:24 [DEBUG] train episode 125: reward = 39.00, steps = 39\n",
      "00:02:24 [DEBUG] train episode 126: reward = 59.00, steps = 59\n",
      "00:02:24 [DEBUG] train episode 127: reward = 48.00, steps = 48\n",
      "00:02:24 [DEBUG] train episode 128: reward = 80.00, steps = 80\n",
      "00:02:24 [DEBUG] train episode 129: reward = 84.00, steps = 84\n",
      "00:02:24 [DEBUG] train episode 130: reward = 85.00, steps = 85\n",
      "00:02:24 [DEBUG] train episode 131: reward = 78.00, steps = 78\n",
      "00:02:24 [DEBUG] train episode 132: reward = 59.00, steps = 59\n",
      "00:02:24 [DEBUG] train episode 133: reward = 56.00, steps = 56\n",
      "00:02:24 [DEBUG] train episode 134: reward = 65.00, steps = 65\n",
      "00:02:24 [DEBUG] train episode 135: reward = 36.00, steps = 36\n",
      "00:02:24 [DEBUG] train episode 136: reward = 51.00, steps = 51\n",
      "00:02:24 [DEBUG] train episode 137: reward = 34.00, steps = 34\n",
      "00:02:24 [DEBUG] train episode 138: reward = 67.00, steps = 67\n",
      "00:02:24 [DEBUG] train episode 139: reward = 59.00, steps = 59\n",
      "00:02:24 [DEBUG] train episode 140: reward = 43.00, steps = 43\n",
      "00:02:24 [DEBUG] train episode 141: reward = 101.00, steps = 101\n",
      "00:02:24 [DEBUG] train episode 142: reward = 74.00, steps = 74\n",
      "00:02:24 [DEBUG] train episode 143: reward = 69.00, steps = 69\n",
      "00:02:24 [DEBUG] train episode 144: reward = 48.00, steps = 48\n",
      "00:02:24 [DEBUG] train episode 145: reward = 55.00, steps = 55\n",
      "00:02:24 [DEBUG] train episode 146: reward = 63.00, steps = 63\n",
      "00:02:24 [DEBUG] train episode 147: reward = 60.00, steps = 60\n",
      "00:02:24 [DEBUG] train episode 148: reward = 108.00, steps = 108\n",
      "00:02:24 [DEBUG] train episode 149: reward = 56.00, steps = 56\n",
      "00:02:24 [DEBUG] train episode 150: reward = 51.00, steps = 51\n",
      "00:02:24 [DEBUG] train episode 151: reward = 70.00, steps = 70\n",
      "00:02:25 [DEBUG] train episode 152: reward = 52.00, steps = 52\n",
      "00:02:25 [DEBUG] train episode 153: reward = 33.00, steps = 33\n",
      "00:02:25 [DEBUG] train episode 154: reward = 48.00, steps = 48\n",
      "00:02:25 [DEBUG] train episode 155: reward = 145.00, steps = 145\n",
      "00:02:25 [DEBUG] train episode 156: reward = 75.00, steps = 75\n",
      "00:02:25 [DEBUG] train episode 157: reward = 48.00, steps = 48\n",
      "00:02:25 [DEBUG] train episode 158: reward = 43.00, steps = 43\n",
      "00:02:25 [DEBUG] train episode 159: reward = 54.00, steps = 54\n",
      "00:02:25 [DEBUG] train episode 160: reward = 119.00, steps = 119\n",
      "00:02:25 [DEBUG] train episode 161: reward = 69.00, steps = 69\n",
      "00:02:25 [DEBUG] train episode 162: reward = 97.00, steps = 97\n",
      "00:02:25 [DEBUG] train episode 163: reward = 63.00, steps = 63\n",
      "00:02:25 [DEBUG] train episode 164: reward = 64.00, steps = 64\n",
      "00:02:25 [DEBUG] train episode 165: reward = 63.00, steps = 63\n",
      "00:02:25 [DEBUG] train episode 166: reward = 44.00, steps = 44\n",
      "00:02:25 [DEBUG] train episode 167: reward = 61.00, steps = 61\n",
      "00:02:25 [DEBUG] train episode 168: reward = 74.00, steps = 74\n",
      "00:02:25 [DEBUG] train episode 169: reward = 68.00, steps = 68\n",
      "00:02:25 [DEBUG] train episode 170: reward = 43.00, steps = 43\n",
      "00:02:25 [DEBUG] train episode 171: reward = 51.00, steps = 51\n",
      "00:02:25 [DEBUG] train episode 172: reward = 51.00, steps = 51\n",
      "00:02:25 [DEBUG] train episode 173: reward = 33.00, steps = 33\n",
      "00:02:25 [DEBUG] train episode 174: reward = 31.00, steps = 31\n",
      "00:02:25 [DEBUG] train episode 175: reward = 49.00, steps = 49\n",
      "00:02:25 [DEBUG] train episode 176: reward = 85.00, steps = 85\n",
      "00:02:25 [DEBUG] train episode 177: reward = 57.00, steps = 57\n",
      "00:02:25 [DEBUG] train episode 178: reward = 54.00, steps = 54\n",
      "00:02:25 [DEBUG] train episode 179: reward = 90.00, steps = 90\n",
      "00:02:25 [DEBUG] train episode 180: reward = 44.00, steps = 44\n",
      "00:02:25 [DEBUG] train episode 181: reward = 68.00, steps = 68\n",
      "00:02:25 [DEBUG] train episode 182: reward = 103.00, steps = 103\n",
      "00:02:25 [DEBUG] train episode 183: reward = 78.00, steps = 78\n",
      "00:02:25 [DEBUG] train episode 184: reward = 71.00, steps = 71\n",
      "00:02:25 [DEBUG] train episode 185: reward = 43.00, steps = 43\n",
      "00:02:25 [DEBUG] train episode 186: reward = 102.00, steps = 102\n",
      "00:02:25 [DEBUG] train episode 187: reward = 57.00, steps = 57\n",
      "00:02:25 [DEBUG] train episode 188: reward = 98.00, steps = 98\n",
      "00:02:25 [DEBUG] train episode 189: reward = 68.00, steps = 68\n",
      "00:02:25 [DEBUG] train episode 190: reward = 62.00, steps = 62\n",
      "00:02:25 [DEBUG] train episode 191: reward = 39.00, steps = 39\n",
      "00:02:25 [DEBUG] train episode 192: reward = 64.00, steps = 64\n",
      "00:02:25 [DEBUG] train episode 193: reward = 130.00, steps = 130\n",
      "00:02:25 [DEBUG] train episode 194: reward = 108.00, steps = 108\n",
      "00:02:25 [DEBUG] train episode 195: reward = 59.00, steps = 59\n",
      "00:02:26 [DEBUG] train episode 196: reward = 50.00, steps = 50\n",
      "00:02:26 [DEBUG] train episode 197: reward = 58.00, steps = 58\n",
      "00:02:26 [DEBUG] train episode 198: reward = 59.00, steps = 59\n",
      "00:02:26 [DEBUG] train episode 199: reward = 60.00, steps = 60\n",
      "00:02:26 [DEBUG] train episode 200: reward = 58.00, steps = 58\n",
      "00:02:26 [DEBUG] train episode 201: reward = 22.00, steps = 22\n",
      "00:02:26 [DEBUG] train episode 202: reward = 49.00, steps = 49\n",
      "00:02:26 [DEBUG] train episode 203: reward = 73.00, steps = 73\n",
      "00:02:26 [DEBUG] train episode 204: reward = 63.00, steps = 63\n",
      "00:02:26 [DEBUG] train episode 205: reward = 65.00, steps = 65\n",
      "00:02:26 [DEBUG] train episode 206: reward = 22.00, steps = 22\n",
      "00:02:26 [DEBUG] train episode 207: reward = 71.00, steps = 71\n",
      "00:02:26 [DEBUG] train episode 208: reward = 72.00, steps = 72\n",
      "00:02:26 [DEBUG] train episode 209: reward = 61.00, steps = 61\n",
      "00:02:26 [DEBUG] train episode 210: reward = 44.00, steps = 44\n",
      "00:02:26 [DEBUG] train episode 211: reward = 47.00, steps = 47\n",
      "00:02:26 [DEBUG] train episode 212: reward = 57.00, steps = 57\n",
      "00:02:26 [DEBUG] train episode 213: reward = 109.00, steps = 109\n",
      "00:02:26 [DEBUG] train episode 214: reward = 61.00, steps = 61\n",
      "00:02:26 [DEBUG] train episode 215: reward = 170.00, steps = 170\n",
      "00:02:26 [DEBUG] train episode 216: reward = 51.00, steps = 51\n",
      "00:02:26 [DEBUG] train episode 217: reward = 71.00, steps = 71\n",
      "00:02:26 [DEBUG] train episode 218: reward = 57.00, steps = 57\n",
      "00:02:26 [DEBUG] train episode 219: reward = 56.00, steps = 56\n",
      "00:02:26 [DEBUG] train episode 220: reward = 38.00, steps = 38\n",
      "00:02:26 [DEBUG] train episode 221: reward = 39.00, steps = 39\n",
      "00:02:26 [DEBUG] train episode 222: reward = 163.00, steps = 163\n",
      "00:02:26 [DEBUG] train episode 223: reward = 61.00, steps = 61\n",
      "00:02:26 [DEBUG] train episode 224: reward = 15.00, steps = 15\n",
      "00:02:26 [DEBUG] train episode 225: reward = 33.00, steps = 33\n",
      "00:02:26 [DEBUG] train episode 226: reward = 34.00, steps = 34\n",
      "00:02:26 [DEBUG] train episode 227: reward = 89.00, steps = 89\n",
      "00:02:26 [DEBUG] train episode 228: reward = 49.00, steps = 49\n",
      "00:02:26 [DEBUG] train episode 229: reward = 37.00, steps = 37\n",
      "00:02:26 [DEBUG] train episode 230: reward = 64.00, steps = 64\n",
      "00:02:26 [DEBUG] train episode 231: reward = 55.00, steps = 55\n",
      "00:02:26 [DEBUG] train episode 232: reward = 66.00, steps = 66\n",
      "00:02:26 [DEBUG] train episode 233: reward = 36.00, steps = 36\n",
      "00:02:26 [DEBUG] train episode 234: reward = 64.00, steps = 64\n",
      "00:02:26 [DEBUG] train episode 235: reward = 59.00, steps = 59\n",
      "00:02:26 [DEBUG] train episode 236: reward = 72.00, steps = 72\n",
      "00:02:26 [DEBUG] train episode 237: reward = 53.00, steps = 53\n",
      "00:02:26 [DEBUG] train episode 238: reward = 72.00, steps = 72\n",
      "00:02:26 [DEBUG] train episode 239: reward = 49.00, steps = 49\n",
      "00:02:26 [DEBUG] train episode 240: reward = 41.00, steps = 41\n",
      "00:02:26 [DEBUG] train episode 241: reward = 119.00, steps = 119\n",
      "00:02:26 [DEBUG] train episode 242: reward = 69.00, steps = 69\n",
      "00:02:27 [DEBUG] train episode 243: reward = 78.00, steps = 78\n",
      "00:02:27 [DEBUG] train episode 244: reward = 69.00, steps = 69\n",
      "00:02:27 [DEBUG] train episode 245: reward = 91.00, steps = 91\n",
      "00:02:27 [DEBUG] train episode 246: reward = 42.00, steps = 42\n",
      "00:02:27 [DEBUG] train episode 247: reward = 104.00, steps = 104\n",
      "00:02:27 [DEBUG] train episode 248: reward = 37.00, steps = 37\n",
      "00:02:27 [DEBUG] train episode 249: reward = 86.00, steps = 86\n",
      "00:02:27 [DEBUG] train episode 250: reward = 66.00, steps = 66\n",
      "00:02:27 [DEBUG] train episode 251: reward = 97.00, steps = 97\n",
      "00:02:27 [DEBUG] train episode 252: reward = 41.00, steps = 41\n",
      "00:02:27 [DEBUG] train episode 253: reward = 70.00, steps = 70\n",
      "00:02:27 [DEBUG] train episode 254: reward = 64.00, steps = 64\n",
      "00:02:27 [DEBUG] train episode 255: reward = 76.00, steps = 76\n",
      "00:02:27 [DEBUG] train episode 256: reward = 43.00, steps = 43\n",
      "00:02:27 [DEBUG] train episode 257: reward = 49.00, steps = 49\n",
      "00:02:27 [DEBUG] train episode 258: reward = 64.00, steps = 64\n",
      "00:02:27 [DEBUG] train episode 259: reward = 29.00, steps = 29\n",
      "00:02:27 [DEBUG] train episode 260: reward = 75.00, steps = 75\n",
      "00:02:27 [DEBUG] train episode 261: reward = 62.00, steps = 62\n",
      "00:02:27 [DEBUG] train episode 262: reward = 53.00, steps = 53\n",
      "00:02:27 [DEBUG] train episode 263: reward = 56.00, steps = 56\n",
      "00:02:27 [DEBUG] train episode 264: reward = 74.00, steps = 74\n",
      "00:02:27 [DEBUG] train episode 265: reward = 75.00, steps = 75\n",
      "00:02:27 [DEBUG] train episode 266: reward = 54.00, steps = 54\n",
      "00:02:27 [DEBUG] train episode 267: reward = 44.00, steps = 44\n",
      "00:02:27 [DEBUG] train episode 268: reward = 62.00, steps = 62\n",
      "00:02:27 [DEBUG] train episode 269: reward = 32.00, steps = 32\n",
      "00:02:27 [DEBUG] train episode 270: reward = 175.00, steps = 175\n",
      "00:02:27 [DEBUG] train episode 271: reward = 53.00, steps = 53\n",
      "00:02:27 [DEBUG] train episode 272: reward = 43.00, steps = 43\n",
      "00:02:27 [DEBUG] train episode 273: reward = 44.00, steps = 44\n",
      "00:02:27 [DEBUG] train episode 274: reward = 52.00, steps = 52\n",
      "00:02:27 [DEBUG] train episode 275: reward = 53.00, steps = 53\n",
      "00:02:27 [DEBUG] train episode 276: reward = 77.00, steps = 77\n",
      "00:02:27 [DEBUG] train episode 277: reward = 94.00, steps = 94\n",
      "00:02:27 [DEBUG] train episode 278: reward = 86.00, steps = 86\n",
      "00:02:27 [DEBUG] train episode 279: reward = 94.00, steps = 94\n",
      "00:02:27 [DEBUG] train episode 280: reward = 129.00, steps = 129\n",
      "00:02:27 [DEBUG] train episode 281: reward = 99.00, steps = 99\n",
      "00:02:27 [DEBUG] train episode 282: reward = 37.00, steps = 37\n",
      "00:02:27 [DEBUG] train episode 283: reward = 50.00, steps = 50\n",
      "00:02:27 [DEBUG] train episode 284: reward = 50.00, steps = 50\n",
      "00:02:27 [DEBUG] train episode 285: reward = 139.00, steps = 139\n",
      "00:02:27 [DEBUG] train episode 286: reward = 52.00, steps = 52\n",
      "00:02:28 [DEBUG] train episode 287: reward = 51.00, steps = 51\n",
      "00:02:28 [DEBUG] train episode 288: reward = 68.00, steps = 68\n",
      "00:02:28 [DEBUG] train episode 289: reward = 46.00, steps = 46\n",
      "00:02:28 [DEBUG] train episode 290: reward = 77.00, steps = 77\n",
      "00:02:28 [DEBUG] train episode 291: reward = 65.00, steps = 65\n",
      "00:02:28 [DEBUG] train episode 292: reward = 45.00, steps = 45\n",
      "00:02:28 [DEBUG] train episode 293: reward = 91.00, steps = 91\n",
      "00:02:28 [DEBUG] train episode 294: reward = 48.00, steps = 48\n",
      "00:02:28 [DEBUG] train episode 295: reward = 78.00, steps = 78\n",
      "00:02:28 [DEBUG] train episode 296: reward = 56.00, steps = 56\n",
      "00:02:28 [DEBUG] train episode 297: reward = 80.00, steps = 80\n",
      "00:02:28 [DEBUG] train episode 298: reward = 67.00, steps = 67\n",
      "00:02:28 [DEBUG] train episode 299: reward = 92.00, steps = 92\n",
      "00:02:28 [DEBUG] train episode 300: reward = 61.00, steps = 61\n",
      "00:02:28 [DEBUG] train episode 301: reward = 54.00, steps = 54\n",
      "00:02:28 [DEBUG] train episode 302: reward = 62.00, steps = 62\n",
      "00:02:28 [DEBUG] train episode 303: reward = 64.00, steps = 64\n",
      "00:02:28 [DEBUG] train episode 304: reward = 55.00, steps = 55\n",
      "00:02:28 [DEBUG] train episode 305: reward = 64.00, steps = 64\n",
      "00:02:28 [DEBUG] train episode 306: reward = 63.00, steps = 63\n",
      "00:02:28 [DEBUG] train episode 307: reward = 66.00, steps = 66\n",
      "00:02:28 [DEBUG] train episode 308: reward = 44.00, steps = 44\n",
      "00:02:28 [DEBUG] train episode 309: reward = 66.00, steps = 66\n",
      "00:02:28 [DEBUG] train episode 310: reward = 59.00, steps = 59\n",
      "00:02:28 [DEBUG] train episode 311: reward = 31.00, steps = 31\n",
      "00:02:28 [DEBUG] train episode 312: reward = 54.00, steps = 54\n",
      "00:02:28 [DEBUG] train episode 313: reward = 71.00, steps = 71\n",
      "00:02:28 [DEBUG] train episode 314: reward = 53.00, steps = 53\n",
      "00:02:28 [DEBUG] train episode 315: reward = 48.00, steps = 48\n",
      "00:02:28 [DEBUG] train episode 316: reward = 78.00, steps = 78\n",
      "00:02:28 [DEBUG] train episode 317: reward = 53.00, steps = 53\n",
      "00:02:28 [DEBUG] train episode 318: reward = 47.00, steps = 47\n",
      "00:02:28 [DEBUG] train episode 319: reward = 62.00, steps = 62\n",
      "00:02:28 [DEBUG] train episode 320: reward = 44.00, steps = 44\n",
      "00:02:28 [DEBUG] train episode 321: reward = 40.00, steps = 40\n",
      "00:02:28 [DEBUG] train episode 322: reward = 23.00, steps = 23\n",
      "00:02:28 [DEBUG] train episode 323: reward = 61.00, steps = 61\n",
      "00:02:28 [DEBUG] train episode 324: reward = 40.00, steps = 40\n",
      "00:02:28 [DEBUG] train episode 325: reward = 65.00, steps = 65\n",
      "00:02:28 [DEBUG] train episode 326: reward = 46.00, steps = 46\n",
      "00:02:28 [DEBUG] train episode 327: reward = 63.00, steps = 63\n",
      "00:02:28 [DEBUG] train episode 328: reward = 56.00, steps = 56\n",
      "00:02:28 [DEBUG] train episode 329: reward = 74.00, steps = 74\n",
      "00:02:28 [DEBUG] train episode 330: reward = 47.00, steps = 47\n",
      "00:02:28 [DEBUG] train episode 331: reward = 77.00, steps = 77\n",
      "00:02:28 [DEBUG] train episode 332: reward = 61.00, steps = 61\n",
      "00:02:28 [DEBUG] train episode 333: reward = 65.00, steps = 65\n",
      "00:02:28 [DEBUG] train episode 334: reward = 81.00, steps = 81\n",
      "00:02:29 [DEBUG] train episode 335: reward = 53.00, steps = 53\n",
      "00:02:29 [DEBUG] train episode 336: reward = 43.00, steps = 43\n",
      "00:02:29 [DEBUG] train episode 337: reward = 80.00, steps = 80\n",
      "00:02:29 [DEBUG] train episode 338: reward = 48.00, steps = 48\n",
      "00:02:29 [DEBUG] train episode 339: reward = 53.00, steps = 53\n",
      "00:02:29 [DEBUG] train episode 340: reward = 74.00, steps = 74\n",
      "00:02:29 [DEBUG] train episode 341: reward = 82.00, steps = 82\n",
      "00:02:29 [DEBUG] train episode 342: reward = 93.00, steps = 93\n",
      "00:02:29 [DEBUG] train episode 343: reward = 57.00, steps = 57\n",
      "00:02:29 [DEBUG] train episode 344: reward = 62.00, steps = 62\n",
      "00:02:29 [DEBUG] train episode 345: reward = 62.00, steps = 62\n",
      "00:02:29 [DEBUG] train episode 346: reward = 59.00, steps = 59\n",
      "00:02:29 [DEBUG] train episode 347: reward = 80.00, steps = 80\n",
      "00:02:29 [DEBUG] train episode 348: reward = 71.00, steps = 71\n",
      "00:02:29 [DEBUG] train episode 349: reward = 90.00, steps = 90\n",
      "00:02:29 [DEBUG] train episode 350: reward = 54.00, steps = 54\n",
      "00:02:29 [DEBUG] train episode 351: reward = 60.00, steps = 60\n",
      "00:02:29 [DEBUG] train episode 352: reward = 45.00, steps = 45\n",
      "00:02:29 [DEBUG] train episode 353: reward = 126.00, steps = 126\n",
      "00:02:29 [DEBUG] train episode 354: reward = 66.00, steps = 66\n",
      "00:02:29 [DEBUG] train episode 355: reward = 53.00, steps = 53\n",
      "00:02:29 [DEBUG] train episode 356: reward = 67.00, steps = 67\n",
      "00:02:29 [DEBUG] train episode 357: reward = 42.00, steps = 42\n",
      "00:02:29 [DEBUG] train episode 358: reward = 119.00, steps = 119\n",
      "00:02:29 [DEBUG] train episode 359: reward = 52.00, steps = 52\n",
      "00:02:29 [DEBUG] train episode 360: reward = 45.00, steps = 45\n",
      "00:02:29 [DEBUG] train episode 361: reward = 46.00, steps = 46\n",
      "00:02:29 [DEBUG] train episode 362: reward = 52.00, steps = 52\n",
      "00:02:29 [DEBUG] train episode 363: reward = 41.00, steps = 41\n",
      "00:02:29 [DEBUG] train episode 364: reward = 58.00, steps = 58\n",
      "00:02:29 [DEBUG] train episode 365: reward = 79.00, steps = 79\n",
      "00:02:29 [DEBUG] train episode 366: reward = 66.00, steps = 66\n",
      "00:02:29 [DEBUG] train episode 367: reward = 53.00, steps = 53\n",
      "00:02:29 [DEBUG] train episode 368: reward = 61.00, steps = 61\n",
      "00:02:29 [DEBUG] train episode 369: reward = 36.00, steps = 36\n",
      "00:02:29 [DEBUG] train episode 370: reward = 71.00, steps = 71\n",
      "00:02:29 [DEBUG] train episode 371: reward = 45.00, steps = 45\n",
      "00:02:29 [DEBUG] train episode 372: reward = 33.00, steps = 33\n",
      "00:02:29 [DEBUG] train episode 373: reward = 90.00, steps = 90\n",
      "00:02:29 [DEBUG] train episode 374: reward = 51.00, steps = 51\n",
      "00:02:29 [DEBUG] train episode 375: reward = 55.00, steps = 55\n",
      "00:02:29 [DEBUG] train episode 376: reward = 112.00, steps = 112\n",
      "00:02:29 [DEBUG] train episode 377: reward = 104.00, steps = 104\n",
      "00:02:29 [DEBUG] train episode 378: reward = 63.00, steps = 63\n",
      "00:02:29 [DEBUG] train episode 379: reward = 90.00, steps = 90\n",
      "00:02:30 [DEBUG] train episode 380: reward = 30.00, steps = 30\n",
      "00:02:30 [DEBUG] train episode 381: reward = 68.00, steps = 68\n",
      "00:02:30 [DEBUG] train episode 382: reward = 47.00, steps = 47\n",
      "00:02:30 [DEBUG] train episode 383: reward = 71.00, steps = 71\n",
      "00:02:30 [DEBUG] train episode 384: reward = 59.00, steps = 59\n",
      "00:02:30 [DEBUG] train episode 385: reward = 121.00, steps = 121\n",
      "00:02:30 [DEBUG] train episode 386: reward = 45.00, steps = 45\n",
      "00:02:30 [DEBUG] train episode 387: reward = 57.00, steps = 57\n",
      "00:02:30 [DEBUG] train episode 388: reward = 88.00, steps = 88\n",
      "00:02:30 [DEBUG] train episode 389: reward = 84.00, steps = 84\n",
      "00:02:30 [DEBUG] train episode 390: reward = 29.00, steps = 29\n",
      "00:02:30 [DEBUG] train episode 391: reward = 77.00, steps = 77\n",
      "00:02:30 [DEBUG] train episode 392: reward = 45.00, steps = 45\n",
      "00:02:30 [DEBUG] train episode 393: reward = 46.00, steps = 46\n",
      "00:02:30 [DEBUG] train episode 394: reward = 101.00, steps = 101\n",
      "00:02:30 [DEBUG] train episode 395: reward = 37.00, steps = 37\n",
      "00:02:30 [DEBUG] train episode 396: reward = 80.00, steps = 80\n",
      "00:02:30 [DEBUG] train episode 397: reward = 94.00, steps = 94\n",
      "00:02:30 [DEBUG] train episode 398: reward = 80.00, steps = 80\n",
      "00:02:30 [DEBUG] train episode 399: reward = 63.00, steps = 63\n",
      "00:02:30 [DEBUG] train episode 400: reward = 33.00, steps = 33\n",
      "00:02:30 [DEBUG] train episode 401: reward = 85.00, steps = 85\n",
      "00:02:30 [DEBUG] train episode 402: reward = 47.00, steps = 47\n",
      "00:02:30 [DEBUG] train episode 403: reward = 47.00, steps = 47\n",
      "00:02:30 [DEBUG] train episode 404: reward = 113.00, steps = 113\n",
      "00:02:30 [DEBUG] train episode 405: reward = 52.00, steps = 52\n",
      "00:02:30 [DEBUG] train episode 406: reward = 50.00, steps = 50\n",
      "00:02:30 [DEBUG] train episode 407: reward = 42.00, steps = 42\n",
      "00:02:30 [DEBUG] train episode 408: reward = 49.00, steps = 49\n",
      "00:02:30 [DEBUG] train episode 409: reward = 67.00, steps = 67\n",
      "00:02:30 [DEBUG] train episode 410: reward = 99.00, steps = 99\n",
      "00:02:30 [DEBUG] train episode 411: reward = 73.00, steps = 73\n",
      "00:02:30 [DEBUG] train episode 412: reward = 59.00, steps = 59\n",
      "00:02:30 [DEBUG] train episode 413: reward = 26.00, steps = 26\n",
      "00:02:30 [DEBUG] train episode 414: reward = 47.00, steps = 47\n",
      "00:02:30 [DEBUG] train episode 415: reward = 86.00, steps = 86\n",
      "00:02:30 [DEBUG] train episode 416: reward = 97.00, steps = 97\n",
      "00:02:30 [DEBUG] train episode 417: reward = 59.00, steps = 59\n",
      "00:02:30 [DEBUG] train episode 418: reward = 53.00, steps = 53\n",
      "00:02:30 [DEBUG] train episode 419: reward = 71.00, steps = 71\n",
      "00:02:30 [DEBUG] train episode 420: reward = 53.00, steps = 53\n",
      "00:02:30 [DEBUG] train episode 421: reward = 79.00, steps = 79\n",
      "00:02:30 [DEBUG] train episode 422: reward = 39.00, steps = 39\n",
      "00:02:30 [DEBUG] train episode 423: reward = 150.00, steps = 150\n",
      "00:02:30 [DEBUG] train episode 424: reward = 50.00, steps = 50\n",
      "00:02:31 [DEBUG] train episode 425: reward = 64.00, steps = 64\n",
      "00:02:31 [DEBUG] train episode 426: reward = 58.00, steps = 58\n",
      "00:02:31 [DEBUG] train episode 427: reward = 61.00, steps = 61\n",
      "00:02:31 [DEBUG] train episode 428: reward = 90.00, steps = 90\n",
      "00:02:31 [DEBUG] train episode 429: reward = 50.00, steps = 50\n",
      "00:02:31 [DEBUG] train episode 430: reward = 117.00, steps = 117\n",
      "00:02:31 [DEBUG] train episode 431: reward = 82.00, steps = 82\n",
      "00:02:31 [DEBUG] train episode 432: reward = 85.00, steps = 85\n",
      "00:02:31 [DEBUG] train episode 433: reward = 60.00, steps = 60\n",
      "00:02:31 [DEBUG] train episode 434: reward = 111.00, steps = 111\n",
      "00:02:31 [DEBUG] train episode 435: reward = 44.00, steps = 44\n",
      "00:02:31 [DEBUG] train episode 436: reward = 81.00, steps = 81\n",
      "00:02:31 [DEBUG] train episode 437: reward = 63.00, steps = 63\n",
      "00:02:31 [DEBUG] train episode 438: reward = 56.00, steps = 56\n",
      "00:02:31 [DEBUG] train episode 439: reward = 115.00, steps = 115\n",
      "00:02:31 [DEBUG] train episode 440: reward = 78.00, steps = 78\n",
      "00:02:31 [DEBUG] train episode 441: reward = 57.00, steps = 57\n",
      "00:02:31 [DEBUG] train episode 442: reward = 93.00, steps = 93\n",
      "00:02:31 [DEBUG] train episode 443: reward = 104.00, steps = 104\n",
      "00:02:31 [DEBUG] train episode 444: reward = 70.00, steps = 70\n",
      "00:02:31 [DEBUG] train episode 445: reward = 44.00, steps = 44\n",
      "00:02:31 [DEBUG] train episode 446: reward = 59.00, steps = 59\n",
      "00:02:31 [DEBUG] train episode 447: reward = 87.00, steps = 87\n",
      "00:02:31 [DEBUG] train episode 448: reward = 43.00, steps = 43\n",
      "00:02:31 [DEBUG] train episode 449: reward = 55.00, steps = 55\n",
      "00:02:31 [DEBUG] train episode 450: reward = 60.00, steps = 60\n",
      "00:02:31 [DEBUG] train episode 451: reward = 49.00, steps = 49\n",
      "00:02:31 [DEBUG] train episode 452: reward = 84.00, steps = 84\n",
      "00:02:31 [DEBUG] train episode 453: reward = 47.00, steps = 47\n",
      "00:02:31 [DEBUG] train episode 454: reward = 70.00, steps = 70\n",
      "00:02:31 [DEBUG] train episode 455: reward = 63.00, steps = 63\n",
      "00:02:31 [DEBUG] train episode 456: reward = 78.00, steps = 78\n",
      "00:02:31 [DEBUG] train episode 457: reward = 85.00, steps = 85\n",
      "00:02:31 [DEBUG] train episode 458: reward = 65.00, steps = 65\n",
      "00:02:31 [DEBUG] train episode 459: reward = 88.00, steps = 88\n",
      "00:02:31 [DEBUG] train episode 460: reward = 87.00, steps = 87\n",
      "00:02:31 [DEBUG] train episode 461: reward = 67.00, steps = 67\n",
      "00:02:31 [DEBUG] train episode 462: reward = 61.00, steps = 61\n",
      "00:02:31 [DEBUG] train episode 463: reward = 69.00, steps = 69\n",
      "00:02:31 [DEBUG] train episode 464: reward = 52.00, steps = 52\n",
      "00:02:31 [DEBUG] train episode 465: reward = 69.00, steps = 69\n",
      "00:02:31 [DEBUG] train episode 466: reward = 62.00, steps = 62\n",
      "00:02:32 [DEBUG] train episode 467: reward = 71.00, steps = 71\n",
      "00:02:32 [DEBUG] train episode 468: reward = 75.00, steps = 75\n",
      "00:02:32 [DEBUG] train episode 469: reward = 141.00, steps = 141\n",
      "00:02:32 [DEBUG] train episode 470: reward = 57.00, steps = 57\n",
      "00:02:32 [DEBUG] train episode 471: reward = 48.00, steps = 48\n",
      "00:02:32 [DEBUG] train episode 472: reward = 123.00, steps = 123\n",
      "00:02:32 [DEBUG] train episode 473: reward = 83.00, steps = 83\n",
      "00:02:32 [DEBUG] train episode 474: reward = 96.00, steps = 96\n",
      "00:02:32 [DEBUG] train episode 475: reward = 43.00, steps = 43\n",
      "00:02:32 [DEBUG] train episode 476: reward = 51.00, steps = 51\n",
      "00:02:32 [DEBUG] train episode 477: reward = 73.00, steps = 73\n",
      "00:02:32 [DEBUG] train episode 478: reward = 67.00, steps = 67\n",
      "00:02:32 [DEBUG] train episode 479: reward = 63.00, steps = 63\n",
      "00:02:32 [DEBUG] train episode 480: reward = 56.00, steps = 56\n",
      "00:02:32 [DEBUG] train episode 481: reward = 84.00, steps = 84\n",
      "00:02:32 [DEBUG] train episode 482: reward = 57.00, steps = 57\n",
      "00:02:32 [DEBUG] train episode 483: reward = 45.00, steps = 45\n",
      "00:02:32 [DEBUG] train episode 484: reward = 103.00, steps = 103\n",
      "00:02:32 [DEBUG] train episode 485: reward = 59.00, steps = 59\n",
      "00:02:32 [DEBUG] train episode 486: reward = 100.00, steps = 100\n",
      "00:02:32 [DEBUG] train episode 487: reward = 50.00, steps = 50\n",
      "00:02:32 [DEBUG] train episode 488: reward = 70.00, steps = 70\n",
      "00:02:32 [DEBUG] train episode 489: reward = 62.00, steps = 62\n",
      "00:02:32 [DEBUG] train episode 490: reward = 59.00, steps = 59\n",
      "00:02:32 [DEBUG] train episode 491: reward = 71.00, steps = 71\n",
      "00:02:32 [DEBUG] train episode 492: reward = 49.00, steps = 49\n",
      "00:02:32 [DEBUG] train episode 493: reward = 70.00, steps = 70\n",
      "00:02:32 [DEBUG] train episode 494: reward = 78.00, steps = 78\n",
      "00:02:32 [DEBUG] train episode 495: reward = 81.00, steps = 81\n",
      "00:02:32 [DEBUG] train episode 496: reward = 52.00, steps = 52\n",
      "00:02:32 [DEBUG] train episode 497: reward = 44.00, steps = 44\n",
      "00:02:32 [DEBUG] train episode 498: reward = 78.00, steps = 78\n",
      "00:02:32 [DEBUG] train episode 499: reward = 51.00, steps = 51\n",
      "00:02:32 [DEBUG] train episode 500: reward = 60.00, steps = 60\n",
      "00:02:32 [DEBUG] train episode 501: reward = 52.00, steps = 52\n",
      "00:02:32 [DEBUG] train episode 502: reward = 69.00, steps = 69\n",
      "00:02:32 [DEBUG] train episode 503: reward = 74.00, steps = 74\n",
      "00:02:32 [DEBUG] train episode 504: reward = 44.00, steps = 44\n",
      "00:02:32 [DEBUG] train episode 505: reward = 81.00, steps = 81\n",
      "00:02:32 [DEBUG] train episode 506: reward = 78.00, steps = 78\n",
      "00:02:32 [DEBUG] train episode 507: reward = 38.00, steps = 38\n",
      "00:02:32 [DEBUG] train episode 508: reward = 56.00, steps = 56\n",
      "00:02:32 [DEBUG] train episode 509: reward = 58.00, steps = 58\n",
      "00:02:33 [DEBUG] train episode 510: reward = 46.00, steps = 46\n",
      "00:02:33 [DEBUG] train episode 511: reward = 88.00, steps = 88\n",
      "00:02:33 [DEBUG] train episode 512: reward = 81.00, steps = 81\n",
      "00:02:33 [DEBUG] train episode 513: reward = 68.00, steps = 68\n",
      "00:02:33 [DEBUG] train episode 514: reward = 87.00, steps = 87\n",
      "00:02:33 [DEBUG] train episode 515: reward = 79.00, steps = 79\n",
      "00:02:33 [DEBUG] train episode 516: reward = 48.00, steps = 48\n",
      "00:02:33 [DEBUG] train episode 517: reward = 73.00, steps = 73\n",
      "00:02:33 [DEBUG] train episode 518: reward = 138.00, steps = 138\n",
      "00:02:33 [DEBUG] train episode 519: reward = 63.00, steps = 63\n",
      "00:02:33 [DEBUG] train episode 520: reward = 64.00, steps = 64\n",
      "00:02:33 [DEBUG] train episode 521: reward = 56.00, steps = 56\n",
      "00:02:33 [DEBUG] train episode 522: reward = 52.00, steps = 52\n",
      "00:02:33 [DEBUG] train episode 523: reward = 102.00, steps = 102\n",
      "00:02:33 [DEBUG] train episode 524: reward = 64.00, steps = 64\n",
      "00:02:33 [DEBUG] train episode 525: reward = 51.00, steps = 51\n",
      "00:02:33 [DEBUG] train episode 526: reward = 47.00, steps = 47\n",
      "00:02:33 [DEBUG] train episode 527: reward = 83.00, steps = 83\n",
      "00:02:33 [DEBUG] train episode 528: reward = 75.00, steps = 75\n",
      "00:02:33 [DEBUG] train episode 529: reward = 84.00, steps = 84\n",
      "00:02:33 [DEBUG] train episode 530: reward = 66.00, steps = 66\n",
      "00:02:33 [DEBUG] train episode 531: reward = 77.00, steps = 77\n",
      "00:02:33 [DEBUG] train episode 532: reward = 52.00, steps = 52\n",
      "00:02:33 [DEBUG] train episode 533: reward = 51.00, steps = 51\n",
      "00:02:33 [DEBUG] train episode 534: reward = 73.00, steps = 73\n",
      "00:02:33 [DEBUG] train episode 535: reward = 77.00, steps = 77\n",
      "00:02:33 [DEBUG] train episode 536: reward = 81.00, steps = 81\n",
      "00:02:33 [DEBUG] train episode 537: reward = 42.00, steps = 42\n",
      "00:02:33 [DEBUG] train episode 538: reward = 126.00, steps = 126\n",
      "00:02:33 [DEBUG] train episode 539: reward = 47.00, steps = 47\n",
      "00:02:33 [DEBUG] train episode 540: reward = 44.00, steps = 44\n",
      "00:02:33 [DEBUG] train episode 541: reward = 92.00, steps = 92\n",
      "00:02:33 [DEBUG] train episode 542: reward = 75.00, steps = 75\n",
      "00:02:33 [DEBUG] train episode 543: reward = 72.00, steps = 72\n",
      "00:02:33 [DEBUG] train episode 544: reward = 54.00, steps = 54\n",
      "00:02:33 [DEBUG] train episode 545: reward = 75.00, steps = 75\n",
      "00:02:33 [DEBUG] train episode 546: reward = 80.00, steps = 80\n",
      "00:02:33 [DEBUG] train episode 547: reward = 60.00, steps = 60\n",
      "00:02:33 [DEBUG] train episode 548: reward = 75.00, steps = 75\n",
      "00:02:33 [DEBUG] train episode 549: reward = 52.00, steps = 52\n",
      "00:02:34 [DEBUG] train episode 550: reward = 197.00, steps = 197\n",
      "00:02:34 [DEBUG] train episode 551: reward = 64.00, steps = 64\n",
      "00:02:34 [DEBUG] train episode 552: reward = 93.00, steps = 93\n",
      "00:02:34 [DEBUG] train episode 553: reward = 59.00, steps = 59\n",
      "00:02:34 [DEBUG] train episode 554: reward = 80.00, steps = 80\n",
      "00:02:34 [DEBUG] train episode 555: reward = 47.00, steps = 47\n",
      "00:02:34 [DEBUG] train episode 556: reward = 76.00, steps = 76\n",
      "00:02:34 [DEBUG] train episode 557: reward = 77.00, steps = 77\n",
      "00:02:34 [DEBUG] train episode 558: reward = 108.00, steps = 108\n",
      "00:02:34 [DEBUG] train episode 559: reward = 57.00, steps = 57\n",
      "00:02:34 [DEBUG] train episode 560: reward = 136.00, steps = 136\n",
      "00:02:34 [DEBUG] train episode 561: reward = 85.00, steps = 85\n",
      "00:02:34 [DEBUG] train episode 562: reward = 77.00, steps = 77\n",
      "00:02:34 [DEBUG] train episode 563: reward = 70.00, steps = 70\n",
      "00:02:34 [DEBUG] train episode 564: reward = 44.00, steps = 44\n",
      "00:02:34 [DEBUG] train episode 565: reward = 59.00, steps = 59\n",
      "00:02:34 [DEBUG] train episode 566: reward = 65.00, steps = 65\n",
      "00:02:34 [DEBUG] train episode 567: reward = 116.00, steps = 116\n",
      "00:02:34 [DEBUG] train episode 568: reward = 104.00, steps = 104\n",
      "00:02:34 [DEBUG] train episode 569: reward = 60.00, steps = 60\n",
      "00:02:34 [DEBUG] train episode 570: reward = 73.00, steps = 73\n",
      "00:02:34 [DEBUG] train episode 571: reward = 50.00, steps = 50\n",
      "00:02:34 [DEBUG] train episode 572: reward = 48.00, steps = 48\n",
      "00:02:34 [DEBUG] train episode 573: reward = 59.00, steps = 59\n",
      "00:02:34 [DEBUG] train episode 574: reward = 110.00, steps = 110\n",
      "00:02:34 [DEBUG] train episode 575: reward = 76.00, steps = 76\n",
      "00:02:34 [DEBUG] train episode 576: reward = 42.00, steps = 42\n",
      "00:02:34 [DEBUG] train episode 577: reward = 52.00, steps = 52\n",
      "00:02:34 [DEBUG] train episode 578: reward = 98.00, steps = 98\n",
      "00:02:34 [DEBUG] train episode 579: reward = 62.00, steps = 62\n",
      "00:02:34 [DEBUG] train episode 580: reward = 188.00, steps = 188\n",
      "00:02:34 [DEBUG] train episode 581: reward = 55.00, steps = 55\n",
      "00:02:34 [DEBUG] train episode 582: reward = 66.00, steps = 66\n",
      "00:02:34 [DEBUG] train episode 583: reward = 70.00, steps = 70\n",
      "00:02:34 [DEBUG] train episode 584: reward = 68.00, steps = 68\n",
      "00:02:34 [DEBUG] train episode 585: reward = 55.00, steps = 55\n",
      "00:02:34 [DEBUG] train episode 586: reward = 83.00, steps = 83\n",
      "00:02:34 [DEBUG] train episode 587: reward = 74.00, steps = 74\n",
      "00:02:34 [DEBUG] train episode 588: reward = 76.00, steps = 76\n",
      "00:02:35 [DEBUG] train episode 589: reward = 61.00, steps = 61\n",
      "00:02:35 [DEBUG] train episode 590: reward = 80.00, steps = 80\n",
      "00:02:35 [DEBUG] train episode 591: reward = 85.00, steps = 85\n",
      "00:02:35 [DEBUG] train episode 592: reward = 89.00, steps = 89\n",
      "00:02:35 [DEBUG] train episode 593: reward = 69.00, steps = 69\n",
      "00:02:35 [DEBUG] train episode 594: reward = 62.00, steps = 62\n",
      "00:02:35 [DEBUG] train episode 595: reward = 53.00, steps = 53\n",
      "00:02:35 [DEBUG] train episode 596: reward = 61.00, steps = 61\n",
      "00:02:35 [DEBUG] train episode 597: reward = 60.00, steps = 60\n",
      "00:02:35 [DEBUG] train episode 598: reward = 94.00, steps = 94\n",
      "00:02:35 [DEBUG] train episode 599: reward = 84.00, steps = 84\n",
      "00:02:35 [DEBUG] train episode 600: reward = 73.00, steps = 73\n",
      "00:02:35 [DEBUG] train episode 601: reward = 66.00, steps = 66\n",
      "00:02:35 [DEBUG] train episode 602: reward = 127.00, steps = 127\n",
      "00:02:35 [DEBUG] train episode 603: reward = 52.00, steps = 52\n",
      "00:02:35 [DEBUG] train episode 604: reward = 60.00, steps = 60\n",
      "00:02:35 [DEBUG] train episode 605: reward = 84.00, steps = 84\n",
      "00:02:35 [DEBUG] train episode 606: reward = 75.00, steps = 75\n",
      "00:02:35 [DEBUG] train episode 607: reward = 79.00, steps = 79\n",
      "00:02:35 [DEBUG] train episode 608: reward = 80.00, steps = 80\n",
      "00:02:35 [DEBUG] train episode 609: reward = 62.00, steps = 62\n",
      "00:02:35 [DEBUG] train episode 610: reward = 70.00, steps = 70\n",
      "00:02:35 [DEBUG] train episode 611: reward = 78.00, steps = 78\n",
      "00:02:35 [DEBUG] train episode 612: reward = 90.00, steps = 90\n",
      "00:02:35 [DEBUG] train episode 613: reward = 62.00, steps = 62\n",
      "00:02:35 [DEBUG] train episode 614: reward = 74.00, steps = 74\n",
      "00:02:35 [DEBUG] train episode 615: reward = 59.00, steps = 59\n",
      "00:02:35 [DEBUG] train episode 616: reward = 94.00, steps = 94\n",
      "00:02:35 [DEBUG] train episode 617: reward = 50.00, steps = 50\n",
      "00:02:35 [DEBUG] train episode 618: reward = 46.00, steps = 46\n",
      "00:02:35 [DEBUG] train episode 619: reward = 58.00, steps = 58\n",
      "00:02:35 [DEBUG] train episode 620: reward = 84.00, steps = 84\n",
      "00:02:35 [DEBUG] train episode 621: reward = 93.00, steps = 93\n",
      "00:02:35 [DEBUG] train episode 622: reward = 62.00, steps = 62\n",
      "00:02:35 [DEBUG] train episode 623: reward = 63.00, steps = 63\n",
      "00:02:35 [DEBUG] train episode 624: reward = 107.00, steps = 107\n",
      "00:02:35 [DEBUG] train episode 625: reward = 38.00, steps = 38\n",
      "00:02:35 [DEBUG] train episode 626: reward = 56.00, steps = 56\n",
      "00:02:35 [DEBUG] train episode 627: reward = 90.00, steps = 90\n",
      "00:02:35 [DEBUG] train episode 628: reward = 39.00, steps = 39\n",
      "00:02:35 [DEBUG] train episode 629: reward = 143.00, steps = 143\n",
      "00:02:35 [DEBUG] train episode 630: reward = 62.00, steps = 62\n",
      "00:02:36 [DEBUG] train episode 631: reward = 61.00, steps = 61\n",
      "00:02:36 [DEBUG] train episode 632: reward = 131.00, steps = 131\n",
      "00:02:36 [DEBUG] train episode 633: reward = 71.00, steps = 71\n",
      "00:02:36 [DEBUG] train episode 634: reward = 60.00, steps = 60\n",
      "00:02:36 [DEBUG] train episode 635: reward = 65.00, steps = 65\n",
      "00:02:36 [DEBUG] train episode 636: reward = 72.00, steps = 72\n",
      "00:02:36 [DEBUG] train episode 637: reward = 69.00, steps = 69\n",
      "00:02:36 [DEBUG] train episode 638: reward = 69.00, steps = 69\n",
      "00:02:36 [DEBUG] train episode 639: reward = 50.00, steps = 50\n",
      "00:02:36 [DEBUG] train episode 640: reward = 48.00, steps = 48\n",
      "00:02:36 [DEBUG] train episode 641: reward = 64.00, steps = 64\n",
      "00:02:36 [DEBUG] train episode 642: reward = 156.00, steps = 156\n",
      "00:02:36 [DEBUG] train episode 643: reward = 85.00, steps = 85\n",
      "00:02:36 [DEBUG] train episode 644: reward = 95.00, steps = 95\n",
      "00:02:36 [DEBUG] train episode 645: reward = 64.00, steps = 64\n",
      "00:02:36 [DEBUG] train episode 646: reward = 111.00, steps = 111\n",
      "00:02:36 [DEBUG] train episode 647: reward = 66.00, steps = 66\n",
      "00:02:36 [DEBUG] train episode 648: reward = 96.00, steps = 96\n",
      "00:02:36 [DEBUG] train episode 649: reward = 61.00, steps = 61\n",
      "00:02:36 [DEBUG] train episode 650: reward = 83.00, steps = 83\n",
      "00:02:36 [DEBUG] train episode 651: reward = 66.00, steps = 66\n",
      "00:02:36 [DEBUG] train episode 652: reward = 60.00, steps = 60\n",
      "00:02:36 [DEBUG] train episode 653: reward = 67.00, steps = 67\n",
      "00:02:36 [DEBUG] train episode 654: reward = 85.00, steps = 85\n",
      "00:02:36 [DEBUG] train episode 655: reward = 105.00, steps = 105\n",
      "00:02:36 [DEBUG] train episode 656: reward = 78.00, steps = 78\n",
      "00:02:36 [DEBUG] train episode 657: reward = 79.00, steps = 79\n",
      "00:02:36 [DEBUG] train episode 658: reward = 74.00, steps = 74\n",
      "00:02:36 [DEBUG] train episode 659: reward = 50.00, steps = 50\n",
      "00:02:36 [DEBUG] train episode 660: reward = 59.00, steps = 59\n",
      "00:02:36 [DEBUG] train episode 661: reward = 107.00, steps = 107\n",
      "00:02:36 [DEBUG] train episode 662: reward = 92.00, steps = 92\n",
      "00:02:36 [DEBUG] train episode 663: reward = 97.00, steps = 97\n",
      "00:02:36 [DEBUG] train episode 664: reward = 50.00, steps = 50\n",
      "00:02:36 [DEBUG] train episode 665: reward = 54.00, steps = 54\n",
      "00:02:36 [DEBUG] train episode 666: reward = 123.00, steps = 123\n",
      "00:02:36 [DEBUG] train episode 667: reward = 71.00, steps = 71\n",
      "00:02:36 [DEBUG] train episode 668: reward = 63.00, steps = 63\n",
      "00:02:37 [DEBUG] train episode 669: reward = 129.00, steps = 129\n",
      "00:02:37 [DEBUG] train episode 670: reward = 57.00, steps = 57\n",
      "00:02:37 [DEBUG] train episode 671: reward = 58.00, steps = 58\n",
      "00:02:37 [DEBUG] train episode 672: reward = 175.00, steps = 175\n",
      "00:02:37 [DEBUG] train episode 673: reward = 98.00, steps = 98\n",
      "00:02:37 [DEBUG] train episode 674: reward = 64.00, steps = 64\n",
      "00:02:37 [DEBUG] train episode 675: reward = 73.00, steps = 73\n",
      "00:02:37 [DEBUG] train episode 676: reward = 46.00, steps = 46\n",
      "00:02:37 [DEBUG] train episode 677: reward = 64.00, steps = 64\n",
      "00:02:37 [DEBUG] train episode 678: reward = 72.00, steps = 72\n",
      "00:02:37 [DEBUG] train episode 679: reward = 79.00, steps = 79\n",
      "00:02:37 [DEBUG] train episode 680: reward = 80.00, steps = 80\n",
      "00:02:37 [DEBUG] train episode 681: reward = 48.00, steps = 48\n",
      "00:02:37 [DEBUG] train episode 682: reward = 58.00, steps = 58\n",
      "00:02:37 [DEBUG] train episode 683: reward = 61.00, steps = 61\n",
      "00:02:37 [DEBUG] train episode 684: reward = 81.00, steps = 81\n",
      "00:02:37 [DEBUG] train episode 685: reward = 72.00, steps = 72\n",
      "00:02:37 [DEBUG] train episode 686: reward = 81.00, steps = 81\n",
      "00:02:37 [DEBUG] train episode 687: reward = 102.00, steps = 102\n",
      "00:02:37 [DEBUG] train episode 688: reward = 80.00, steps = 80\n",
      "00:02:37 [DEBUG] train episode 689: reward = 82.00, steps = 82\n",
      "00:02:37 [DEBUG] train episode 690: reward = 82.00, steps = 82\n",
      "00:02:37 [DEBUG] train episode 691: reward = 65.00, steps = 65\n",
      "00:02:37 [DEBUG] train episode 692: reward = 78.00, steps = 78\n",
      "00:02:37 [DEBUG] train episode 693: reward = 58.00, steps = 58\n",
      "00:02:37 [DEBUG] train episode 694: reward = 75.00, steps = 75\n",
      "00:02:37 [DEBUG] train episode 695: reward = 58.00, steps = 58\n",
      "00:02:37 [DEBUG] train episode 696: reward = 67.00, steps = 67\n",
      "00:02:37 [DEBUG] train episode 697: reward = 81.00, steps = 81\n",
      "00:02:37 [DEBUG] train episode 698: reward = 80.00, steps = 80\n",
      "00:02:37 [DEBUG] train episode 699: reward = 70.00, steps = 70\n",
      "00:02:37 [DEBUG] train episode 700: reward = 66.00, steps = 66\n",
      "00:02:37 [DEBUG] train episode 701: reward = 94.00, steps = 94\n",
      "00:02:37 [DEBUG] train episode 702: reward = 54.00, steps = 54\n",
      "00:02:37 [DEBUG] train episode 703: reward = 87.00, steps = 87\n",
      "00:02:37 [DEBUG] train episode 704: reward = 110.00, steps = 110\n",
      "00:02:37 [DEBUG] train episode 705: reward = 82.00, steps = 82\n",
      "00:02:37 [DEBUG] train episode 706: reward = 67.00, steps = 67\n",
      "00:02:37 [DEBUG] train episode 707: reward = 75.00, steps = 75\n",
      "00:02:37 [DEBUG] train episode 708: reward = 81.00, steps = 81\n",
      "00:02:37 [DEBUG] train episode 709: reward = 62.00, steps = 62\n",
      "00:02:38 [DEBUG] train episode 710: reward = 69.00, steps = 69\n",
      "00:02:38 [DEBUG] train episode 711: reward = 132.00, steps = 132\n",
      "00:02:38 [DEBUG] train episode 712: reward = 96.00, steps = 96\n",
      "00:02:38 [DEBUG] train episode 713: reward = 53.00, steps = 53\n",
      "00:02:38 [DEBUG] train episode 714: reward = 41.00, steps = 41\n",
      "00:02:38 [DEBUG] train episode 715: reward = 54.00, steps = 54\n",
      "00:02:38 [DEBUG] train episode 716: reward = 48.00, steps = 48\n",
      "00:02:38 [DEBUG] train episode 717: reward = 116.00, steps = 116\n",
      "00:02:38 [DEBUG] train episode 718: reward = 53.00, steps = 53\n",
      "00:02:38 [DEBUG] train episode 719: reward = 96.00, steps = 96\n",
      "00:02:38 [DEBUG] train episode 720: reward = 65.00, steps = 65\n",
      "00:02:38 [DEBUG] train episode 721: reward = 58.00, steps = 58\n",
      "00:02:38 [DEBUG] train episode 722: reward = 88.00, steps = 88\n",
      "00:02:38 [DEBUG] train episode 723: reward = 70.00, steps = 70\n",
      "00:02:38 [DEBUG] train episode 724: reward = 45.00, steps = 45\n",
      "00:02:38 [DEBUG] train episode 725: reward = 80.00, steps = 80\n",
      "00:02:38 [DEBUG] train episode 726: reward = 111.00, steps = 111\n",
      "00:02:38 [DEBUG] train episode 727: reward = 113.00, steps = 113\n",
      "00:02:38 [DEBUG] train episode 728: reward = 83.00, steps = 83\n",
      "00:02:38 [DEBUG] train episode 729: reward = 141.00, steps = 141\n",
      "00:02:38 [DEBUG] train episode 730: reward = 71.00, steps = 71\n",
      "00:02:38 [DEBUG] train episode 731: reward = 102.00, steps = 102\n",
      "00:02:38 [DEBUG] train episode 732: reward = 82.00, steps = 82\n",
      "00:02:38 [DEBUG] train episode 733: reward = 71.00, steps = 71\n",
      "00:02:38 [DEBUG] train episode 734: reward = 54.00, steps = 54\n",
      "00:02:38 [DEBUG] train episode 735: reward = 116.00, steps = 116\n",
      "00:02:38 [DEBUG] train episode 736: reward = 88.00, steps = 88\n",
      "00:02:38 [DEBUG] train episode 737: reward = 68.00, steps = 68\n",
      "00:02:38 [DEBUG] train episode 738: reward = 70.00, steps = 70\n",
      "00:02:38 [DEBUG] train episode 739: reward = 110.00, steps = 110\n",
      "00:02:38 [DEBUG] train episode 740: reward = 71.00, steps = 71\n",
      "00:02:38 [DEBUG] train episode 741: reward = 56.00, steps = 56\n",
      "00:02:38 [DEBUG] train episode 742: reward = 121.00, steps = 121\n",
      "00:02:38 [DEBUG] train episode 743: reward = 104.00, steps = 104\n",
      "00:02:38 [DEBUG] train episode 744: reward = 85.00, steps = 85\n",
      "00:02:38 [DEBUG] train episode 745: reward = 55.00, steps = 55\n",
      "00:02:38 [DEBUG] train episode 746: reward = 81.00, steps = 81\n",
      "00:02:38 [DEBUG] train episode 747: reward = 81.00, steps = 81\n",
      "00:02:39 [DEBUG] train episode 748: reward = 104.00, steps = 104\n",
      "00:02:39 [DEBUG] train episode 749: reward = 62.00, steps = 62\n",
      "00:02:39 [DEBUG] train episode 750: reward = 71.00, steps = 71\n",
      "00:02:39 [DEBUG] train episode 751: reward = 168.00, steps = 168\n",
      "00:02:39 [DEBUG] train episode 752: reward = 136.00, steps = 136\n",
      "00:02:39 [DEBUG] train episode 753: reward = 133.00, steps = 133\n",
      "00:02:39 [DEBUG] train episode 754: reward = 70.00, steps = 70\n",
      "00:02:39 [DEBUG] train episode 755: reward = 100.00, steps = 100\n",
      "00:02:39 [DEBUG] train episode 756: reward = 101.00, steps = 101\n",
      "00:02:39 [DEBUG] train episode 757: reward = 70.00, steps = 70\n",
      "00:02:39 [DEBUG] train episode 758: reward = 57.00, steps = 57\n",
      "00:02:39 [DEBUG] train episode 759: reward = 60.00, steps = 60\n",
      "00:02:39 [DEBUG] train episode 760: reward = 80.00, steps = 80\n",
      "00:02:39 [DEBUG] train episode 761: reward = 79.00, steps = 79\n",
      "00:02:39 [DEBUG] train episode 762: reward = 46.00, steps = 46\n",
      "00:02:39 [DEBUG] train episode 763: reward = 72.00, steps = 72\n",
      "00:02:39 [DEBUG] train episode 764: reward = 65.00, steps = 65\n",
      "00:02:39 [DEBUG] train episode 765: reward = 66.00, steps = 66\n",
      "00:02:39 [DEBUG] train episode 766: reward = 83.00, steps = 83\n",
      "00:02:39 [DEBUG] train episode 767: reward = 67.00, steps = 67\n",
      "00:02:39 [DEBUG] train episode 768: reward = 117.00, steps = 117\n",
      "00:02:39 [DEBUG] train episode 769: reward = 200.00, steps = 200\n",
      "00:02:39 [DEBUG] train episode 770: reward = 62.00, steps = 62\n",
      "00:02:39 [DEBUG] train episode 771: reward = 144.00, steps = 144\n",
      "00:02:39 [DEBUG] train episode 772: reward = 91.00, steps = 91\n",
      "00:02:39 [DEBUG] train episode 773: reward = 58.00, steps = 58\n",
      "00:02:39 [DEBUG] train episode 774: reward = 57.00, steps = 57\n",
      "00:02:39 [DEBUG] train episode 775: reward = 93.00, steps = 93\n",
      "00:02:39 [DEBUG] train episode 776: reward = 198.00, steps = 198\n",
      "00:02:39 [DEBUG] train episode 777: reward = 94.00, steps = 94\n",
      "00:02:39 [DEBUG] train episode 778: reward = 77.00, steps = 77\n",
      "00:02:39 [DEBUG] train episode 779: reward = 73.00, steps = 73\n",
      "00:02:39 [DEBUG] train episode 780: reward = 63.00, steps = 63\n",
      "00:02:39 [DEBUG] train episode 781: reward = 80.00, steps = 80\n",
      "00:02:40 [DEBUG] train episode 782: reward = 96.00, steps = 96\n",
      "00:02:40 [DEBUG] train episode 783: reward = 69.00, steps = 69\n",
      "00:02:40 [DEBUG] train episode 784: reward = 64.00, steps = 64\n",
      "00:02:40 [DEBUG] train episode 785: reward = 75.00, steps = 75\n",
      "00:02:40 [DEBUG] train episode 786: reward = 103.00, steps = 103\n",
      "00:02:40 [DEBUG] train episode 787: reward = 93.00, steps = 93\n",
      "00:02:40 [DEBUG] train episode 788: reward = 73.00, steps = 73\n",
      "00:02:40 [DEBUG] train episode 789: reward = 59.00, steps = 59\n",
      "00:02:40 [DEBUG] train episode 790: reward = 94.00, steps = 94\n",
      "00:02:40 [DEBUG] train episode 791: reward = 200.00, steps = 200\n",
      "00:02:40 [DEBUG] train episode 792: reward = 79.00, steps = 79\n",
      "00:02:40 [DEBUG] train episode 793: reward = 152.00, steps = 152\n",
      "00:02:40 [DEBUG] train episode 794: reward = 78.00, steps = 78\n",
      "00:02:40 [DEBUG] train episode 795: reward = 50.00, steps = 50\n",
      "00:02:40 [DEBUG] train episode 796: reward = 137.00, steps = 137\n",
      "00:02:40 [DEBUG] train episode 797: reward = 57.00, steps = 57\n",
      "00:02:40 [DEBUG] train episode 798: reward = 72.00, steps = 72\n",
      "00:02:40 [DEBUG] train episode 799: reward = 79.00, steps = 79\n",
      "00:02:40 [DEBUG] train episode 800: reward = 122.00, steps = 122\n",
      "00:02:40 [DEBUG] train episode 801: reward = 91.00, steps = 91\n",
      "00:02:40 [DEBUG] train episode 802: reward = 54.00, steps = 54\n",
      "00:02:40 [DEBUG] train episode 803: reward = 68.00, steps = 68\n",
      "00:02:40 [DEBUG] train episode 804: reward = 56.00, steps = 56\n",
      "00:02:40 [DEBUG] train episode 805: reward = 107.00, steps = 107\n",
      "00:02:40 [DEBUG] train episode 806: reward = 70.00, steps = 70\n",
      "00:02:40 [DEBUG] train episode 807: reward = 78.00, steps = 78\n",
      "00:02:40 [DEBUG] train episode 808: reward = 52.00, steps = 52\n",
      "00:02:40 [DEBUG] train episode 809: reward = 76.00, steps = 76\n",
      "00:02:40 [DEBUG] train episode 810: reward = 107.00, steps = 107\n",
      "00:02:40 [DEBUG] train episode 811: reward = 86.00, steps = 86\n",
      "00:02:40 [DEBUG] train episode 812: reward = 128.00, steps = 128\n",
      "00:02:40 [DEBUG] train episode 813: reward = 66.00, steps = 66\n",
      "00:02:40 [DEBUG] train episode 814: reward = 61.00, steps = 61\n",
      "00:02:40 [DEBUG] train episode 815: reward = 69.00, steps = 69\n",
      "00:02:40 [DEBUG] train episode 816: reward = 69.00, steps = 69\n",
      "00:02:40 [DEBUG] train episode 817: reward = 78.00, steps = 78\n",
      "00:02:40 [DEBUG] train episode 818: reward = 73.00, steps = 73\n",
      "00:02:41 [DEBUG] train episode 819: reward = 92.00, steps = 92\n",
      "00:02:41 [DEBUG] train episode 820: reward = 62.00, steps = 62\n",
      "00:02:41 [DEBUG] train episode 821: reward = 63.00, steps = 63\n",
      "00:02:41 [DEBUG] train episode 822: reward = 85.00, steps = 85\n",
      "00:02:41 [DEBUG] train episode 823: reward = 76.00, steps = 76\n",
      "00:02:41 [DEBUG] train episode 824: reward = 85.00, steps = 85\n",
      "00:02:41 [DEBUG] train episode 825: reward = 103.00, steps = 103\n",
      "00:02:41 [DEBUG] train episode 826: reward = 87.00, steps = 87\n",
      "00:02:41 [DEBUG] train episode 827: reward = 74.00, steps = 74\n",
      "00:02:41 [DEBUG] train episode 828: reward = 72.00, steps = 72\n",
      "00:02:41 [DEBUG] train episode 829: reward = 67.00, steps = 67\n",
      "00:02:41 [DEBUG] train episode 830: reward = 130.00, steps = 130\n",
      "00:02:41 [DEBUG] train episode 831: reward = 89.00, steps = 89\n",
      "00:02:41 [DEBUG] train episode 832: reward = 72.00, steps = 72\n",
      "00:02:41 [DEBUG] train episode 833: reward = 114.00, steps = 114\n",
      "00:02:41 [DEBUG] train episode 834: reward = 78.00, steps = 78\n",
      "00:02:41 [DEBUG] train episode 835: reward = 77.00, steps = 77\n",
      "00:02:41 [DEBUG] train episode 836: reward = 72.00, steps = 72\n",
      "00:02:41 [DEBUG] train episode 837: reward = 95.00, steps = 95\n",
      "00:02:41 [DEBUG] train episode 838: reward = 71.00, steps = 71\n",
      "00:02:41 [DEBUG] train episode 839: reward = 124.00, steps = 124\n",
      "00:02:41 [DEBUG] train episode 840: reward = 79.00, steps = 79\n",
      "00:02:41 [DEBUG] train episode 841: reward = 73.00, steps = 73\n",
      "00:02:41 [DEBUG] train episode 842: reward = 126.00, steps = 126\n",
      "00:02:41 [DEBUG] train episode 843: reward = 163.00, steps = 163\n",
      "00:02:41 [DEBUG] train episode 844: reward = 62.00, steps = 62\n",
      "00:02:41 [DEBUG] train episode 845: reward = 61.00, steps = 61\n",
      "00:02:41 [DEBUG] train episode 846: reward = 98.00, steps = 98\n",
      "00:02:41 [DEBUG] train episode 847: reward = 72.00, steps = 72\n",
      "00:02:41 [DEBUG] train episode 848: reward = 147.00, steps = 147\n",
      "00:02:41 [DEBUG] train episode 849: reward = 88.00, steps = 88\n",
      "00:02:41 [DEBUG] train episode 850: reward = 110.00, steps = 110\n",
      "00:02:41 [DEBUG] train episode 851: reward = 64.00, steps = 64\n",
      "00:02:41 [DEBUG] train episode 852: reward = 69.00, steps = 69\n",
      "00:02:41 [DEBUG] train episode 853: reward = 99.00, steps = 99\n",
      "00:02:42 [DEBUG] train episode 854: reward = 89.00, steps = 89\n",
      "00:02:42 [DEBUG] train episode 855: reward = 85.00, steps = 85\n",
      "00:02:42 [DEBUG] train episode 856: reward = 71.00, steps = 71\n",
      "00:02:42 [DEBUG] train episode 857: reward = 114.00, steps = 114\n",
      "00:02:42 [DEBUG] train episode 858: reward = 90.00, steps = 90\n",
      "00:02:42 [DEBUG] train episode 859: reward = 133.00, steps = 133\n",
      "00:02:42 [DEBUG] train episode 860: reward = 63.00, steps = 63\n",
      "00:02:42 [DEBUG] train episode 861: reward = 133.00, steps = 133\n",
      "00:02:42 [DEBUG] train episode 862: reward = 92.00, steps = 92\n",
      "00:02:42 [DEBUG] train episode 863: reward = 45.00, steps = 45\n",
      "00:02:42 [DEBUG] train episode 864: reward = 89.00, steps = 89\n",
      "00:02:42 [DEBUG] train episode 865: reward = 58.00, steps = 58\n",
      "00:02:42 [DEBUG] train episode 866: reward = 52.00, steps = 52\n",
      "00:02:42 [DEBUG] train episode 867: reward = 118.00, steps = 118\n",
      "00:02:42 [DEBUG] train episode 868: reward = 74.00, steps = 74\n",
      "00:02:42 [DEBUG] train episode 869: reward = 74.00, steps = 74\n",
      "00:02:42 [DEBUG] train episode 870: reward = 65.00, steps = 65\n",
      "00:02:42 [DEBUG] train episode 871: reward = 95.00, steps = 95\n",
      "00:02:42 [DEBUG] train episode 872: reward = 105.00, steps = 105\n",
      "00:02:42 [DEBUG] train episode 873: reward = 135.00, steps = 135\n",
      "00:02:42 [DEBUG] train episode 874: reward = 69.00, steps = 69\n",
      "00:02:42 [DEBUG] train episode 875: reward = 76.00, steps = 76\n",
      "00:02:42 [DEBUG] train episode 876: reward = 65.00, steps = 65\n",
      "00:02:42 [DEBUG] train episode 877: reward = 97.00, steps = 97\n",
      "00:02:42 [DEBUG] train episode 878: reward = 107.00, steps = 107\n",
      "00:02:42 [DEBUG] train episode 879: reward = 99.00, steps = 99\n",
      "00:02:42 [DEBUG] train episode 880: reward = 100.00, steps = 100\n",
      "00:02:42 [DEBUG] train episode 881: reward = 101.00, steps = 101\n",
      "00:02:42 [DEBUG] train episode 882: reward = 78.00, steps = 78\n",
      "00:02:42 [DEBUG] train episode 883: reward = 90.00, steps = 90\n",
      "00:02:42 [DEBUG] train episode 884: reward = 65.00, steps = 65\n",
      "00:02:42 [DEBUG] train episode 885: reward = 65.00, steps = 65\n",
      "00:02:42 [DEBUG] train episode 886: reward = 105.00, steps = 105\n",
      "00:02:42 [DEBUG] train episode 887: reward = 82.00, steps = 82\n",
      "00:02:42 [DEBUG] train episode 888: reward = 77.00, steps = 77\n",
      "00:02:43 [DEBUG] train episode 889: reward = 67.00, steps = 67\n",
      "00:02:43 [DEBUG] train episode 890: reward = 84.00, steps = 84\n",
      "00:02:43 [DEBUG] train episode 891: reward = 83.00, steps = 83\n",
      "00:02:43 [DEBUG] train episode 892: reward = 96.00, steps = 96\n",
      "00:02:43 [DEBUG] train episode 893: reward = 62.00, steps = 62\n",
      "00:02:43 [DEBUG] train episode 894: reward = 68.00, steps = 68\n",
      "00:02:43 [DEBUG] train episode 895: reward = 89.00, steps = 89\n",
      "00:02:43 [DEBUG] train episode 896: reward = 126.00, steps = 126\n",
      "00:02:43 [DEBUG] train episode 897: reward = 73.00, steps = 73\n",
      "00:02:43 [DEBUG] train episode 898: reward = 93.00, steps = 93\n",
      "00:02:43 [DEBUG] train episode 899: reward = 69.00, steps = 69\n",
      "00:02:43 [DEBUG] train episode 900: reward = 116.00, steps = 116\n",
      "00:02:43 [DEBUG] train episode 901: reward = 104.00, steps = 104\n",
      "00:02:43 [DEBUG] train episode 902: reward = 93.00, steps = 93\n",
      "00:02:43 [DEBUG] train episode 903: reward = 84.00, steps = 84\n",
      "00:02:43 [DEBUG] train episode 904: reward = 67.00, steps = 67\n",
      "00:02:43 [DEBUG] train episode 905: reward = 71.00, steps = 71\n",
      "00:02:43 [DEBUG] train episode 906: reward = 92.00, steps = 92\n",
      "00:02:43 [DEBUG] train episode 907: reward = 92.00, steps = 92\n",
      "00:02:43 [DEBUG] train episode 908: reward = 83.00, steps = 83\n",
      "00:02:43 [DEBUG] train episode 909: reward = 103.00, steps = 103\n",
      "00:02:43 [DEBUG] train episode 910: reward = 65.00, steps = 65\n",
      "00:02:43 [DEBUG] train episode 911: reward = 61.00, steps = 61\n",
      "00:02:43 [DEBUG] train episode 912: reward = 59.00, steps = 59\n",
      "00:02:43 [DEBUG] train episode 913: reward = 96.00, steps = 96\n",
      "00:02:43 [DEBUG] train episode 914: reward = 60.00, steps = 60\n",
      "00:02:43 [DEBUG] train episode 915: reward = 94.00, steps = 94\n",
      "00:02:43 [DEBUG] train episode 916: reward = 88.00, steps = 88\n",
      "00:02:43 [DEBUG] train episode 917: reward = 83.00, steps = 83\n",
      "00:02:43 [DEBUG] train episode 918: reward = 132.00, steps = 132\n",
      "00:02:43 [DEBUG] train episode 919: reward = 57.00, steps = 57\n",
      "00:02:43 [DEBUG] train episode 920: reward = 94.00, steps = 94\n",
      "00:02:43 [DEBUG] train episode 921: reward = 132.00, steps = 132\n",
      "00:02:43 [DEBUG] train episode 922: reward = 61.00, steps = 61\n",
      "00:02:43 [DEBUG] train episode 923: reward = 55.00, steps = 55\n",
      "00:02:44 [DEBUG] train episode 924: reward = 194.00, steps = 194\n",
      "00:02:44 [DEBUG] train episode 925: reward = 56.00, steps = 56\n",
      "00:02:44 [DEBUG] train episode 926: reward = 67.00, steps = 67\n",
      "00:02:44 [DEBUG] train episode 927: reward = 85.00, steps = 85\n",
      "00:02:44 [DEBUG] train episode 928: reward = 68.00, steps = 68\n",
      "00:02:44 [DEBUG] train episode 929: reward = 76.00, steps = 76\n",
      "00:02:44 [DEBUG] train episode 930: reward = 115.00, steps = 115\n",
      "00:02:44 [DEBUG] train episode 931: reward = 99.00, steps = 99\n",
      "00:02:44 [DEBUG] train episode 932: reward = 61.00, steps = 61\n",
      "00:02:44 [DEBUG] train episode 933: reward = 69.00, steps = 69\n",
      "00:02:44 [DEBUG] train episode 934: reward = 91.00, steps = 91\n",
      "00:02:44 [DEBUG] train episode 935: reward = 85.00, steps = 85\n",
      "00:02:44 [DEBUG] train episode 936: reward = 88.00, steps = 88\n",
      "00:02:44 [DEBUG] train episode 937: reward = 73.00, steps = 73\n",
      "00:02:44 [DEBUG] train episode 938: reward = 78.00, steps = 78\n",
      "00:02:44 [DEBUG] train episode 939: reward = 76.00, steps = 76\n",
      "00:02:44 [DEBUG] train episode 940: reward = 156.00, steps = 156\n",
      "00:02:44 [DEBUG] train episode 941: reward = 112.00, steps = 112\n",
      "00:02:44 [DEBUG] train episode 942: reward = 105.00, steps = 105\n",
      "00:02:44 [DEBUG] train episode 943: reward = 60.00, steps = 60\n",
      "00:02:44 [DEBUG] train episode 944: reward = 84.00, steps = 84\n",
      "00:02:44 [DEBUG] train episode 945: reward = 98.00, steps = 98\n",
      "00:02:44 [DEBUG] train episode 946: reward = 115.00, steps = 115\n",
      "00:02:44 [DEBUG] train episode 947: reward = 83.00, steps = 83\n",
      "00:02:44 [DEBUG] train episode 948: reward = 98.00, steps = 98\n",
      "00:02:44 [DEBUG] train episode 949: reward = 65.00, steps = 65\n",
      "00:02:44 [DEBUG] train episode 950: reward = 107.00, steps = 107\n",
      "00:02:44 [DEBUG] train episode 951: reward = 61.00, steps = 61\n",
      "00:02:44 [DEBUG] train episode 952: reward = 119.00, steps = 119\n",
      "00:02:44 [DEBUG] train episode 953: reward = 75.00, steps = 75\n",
      "00:02:44 [DEBUG] train episode 954: reward = 129.00, steps = 129\n",
      "00:02:44 [DEBUG] train episode 955: reward = 58.00, steps = 58\n",
      "00:02:44 [DEBUG] train episode 956: reward = 115.00, steps = 115\n",
      "00:02:44 [DEBUG] train episode 957: reward = 66.00, steps = 66\n",
      "00:02:44 [DEBUG] train episode 958: reward = 87.00, steps = 87\n",
      "00:02:44 [DEBUG] train episode 959: reward = 104.00, steps = 104\n",
      "00:02:45 [DEBUG] train episode 960: reward = 95.00, steps = 95\n",
      "00:02:45 [DEBUG] train episode 961: reward = 96.00, steps = 96\n",
      "00:02:45 [DEBUG] train episode 962: reward = 128.00, steps = 128\n",
      "00:02:45 [DEBUG] train episode 963: reward = 57.00, steps = 57\n",
      "00:02:45 [DEBUG] train episode 964: reward = 84.00, steps = 84\n",
      "00:02:45 [DEBUG] train episode 965: reward = 94.00, steps = 94\n",
      "00:02:45 [DEBUG] train episode 966: reward = 70.00, steps = 70\n",
      "00:02:45 [DEBUG] train episode 967: reward = 69.00, steps = 69\n",
      "00:02:45 [DEBUG] train episode 968: reward = 83.00, steps = 83\n",
      "00:02:45 [DEBUG] train episode 969: reward = 120.00, steps = 120\n",
      "00:02:45 [DEBUG] train episode 970: reward = 63.00, steps = 63\n",
      "00:02:45 [DEBUG] train episode 971: reward = 77.00, steps = 77\n",
      "00:02:45 [DEBUG] train episode 972: reward = 68.00, steps = 68\n",
      "00:02:45 [DEBUG] train episode 973: reward = 89.00, steps = 89\n",
      "00:02:45 [DEBUG] train episode 974: reward = 71.00, steps = 71\n",
      "00:02:45 [DEBUG] train episode 975: reward = 126.00, steps = 126\n",
      "00:02:45 [DEBUG] train episode 976: reward = 75.00, steps = 75\n",
      "00:02:45 [DEBUG] train episode 977: reward = 79.00, steps = 79\n",
      "00:02:45 [DEBUG] train episode 978: reward = 83.00, steps = 83\n",
      "00:02:45 [DEBUG] train episode 979: reward = 138.00, steps = 138\n",
      "00:02:45 [DEBUG] train episode 980: reward = 87.00, steps = 87\n",
      "00:02:45 [DEBUG] train episode 981: reward = 119.00, steps = 119\n",
      "00:02:45 [DEBUG] train episode 982: reward = 131.00, steps = 131\n",
      "00:02:45 [DEBUG] train episode 983: reward = 85.00, steps = 85\n",
      "00:02:45 [DEBUG] train episode 984: reward = 134.00, steps = 134\n",
      "00:02:45 [DEBUG] train episode 985: reward = 109.00, steps = 109\n",
      "00:02:45 [DEBUG] train episode 986: reward = 81.00, steps = 81\n",
      "00:02:45 [DEBUG] train episode 987: reward = 77.00, steps = 77\n",
      "00:02:45 [DEBUG] train episode 988: reward = 146.00, steps = 146\n",
      "00:02:45 [DEBUG] train episode 989: reward = 127.00, steps = 127\n",
      "00:02:45 [DEBUG] train episode 990: reward = 96.00, steps = 96\n",
      "00:02:45 [DEBUG] train episode 991: reward = 61.00, steps = 61\n",
      "00:02:45 [DEBUG] train episode 992: reward = 84.00, steps = 84\n",
      "00:02:46 [DEBUG] train episode 993: reward = 105.00, steps = 105\n",
      "00:02:46 [DEBUG] train episode 994: reward = 82.00, steps = 82\n",
      "00:02:46 [DEBUG] train episode 995: reward = 88.00, steps = 88\n",
      "00:02:46 [DEBUG] train episode 996: reward = 78.00, steps = 78\n",
      "00:02:46 [DEBUG] train episode 997: reward = 136.00, steps = 136\n",
      "00:02:46 [DEBUG] train episode 998: reward = 108.00, steps = 108\n",
      "00:02:46 [DEBUG] train episode 999: reward = 60.00, steps = 60\n",
      "00:02:46 [DEBUG] train episode 1000: reward = 86.00, steps = 86\n",
      "00:02:46 [DEBUG] train episode 1001: reward = 86.00, steps = 86\n",
      "00:02:46 [DEBUG] train episode 1002: reward = 87.00, steps = 87\n",
      "00:02:46 [DEBUG] train episode 1003: reward = 63.00, steps = 63\n",
      "00:02:46 [DEBUG] train episode 1004: reward = 79.00, steps = 79\n",
      "00:02:46 [DEBUG] train episode 1005: reward = 101.00, steps = 101\n",
      "00:02:46 [DEBUG] train episode 1006: reward = 111.00, steps = 111\n",
      "00:02:46 [DEBUG] train episode 1007: reward = 113.00, steps = 113\n",
      "00:02:46 [DEBUG] train episode 1008: reward = 51.00, steps = 51\n",
      "00:02:46 [DEBUG] train episode 1009: reward = 107.00, steps = 107\n",
      "00:02:46 [DEBUG] train episode 1010: reward = 55.00, steps = 55\n",
      "00:02:46 [DEBUG] train episode 1011: reward = 75.00, steps = 75\n",
      "00:02:46 [DEBUG] train episode 1012: reward = 157.00, steps = 157\n",
      "00:02:46 [DEBUG] train episode 1013: reward = 74.00, steps = 74\n",
      "00:02:46 [DEBUG] train episode 1014: reward = 104.00, steps = 104\n",
      "00:02:46 [DEBUG] train episode 1015: reward = 149.00, steps = 149\n",
      "00:02:46 [DEBUG] train episode 1016: reward = 78.00, steps = 78\n",
      "00:02:46 [DEBUG] train episode 1017: reward = 95.00, steps = 95\n",
      "00:02:46 [DEBUG] train episode 1018: reward = 105.00, steps = 105\n",
      "00:02:46 [DEBUG] train episode 1019: reward = 65.00, steps = 65\n",
      "00:02:46 [DEBUG] train episode 1020: reward = 104.00, steps = 104\n",
      "00:02:46 [DEBUG] train episode 1021: reward = 66.00, steps = 66\n",
      "00:02:46 [DEBUG] train episode 1022: reward = 92.00, steps = 92\n",
      "00:02:46 [DEBUG] train episode 1023: reward = 75.00, steps = 75\n",
      "00:02:46 [DEBUG] train episode 1024: reward = 99.00, steps = 99\n",
      "00:02:46 [DEBUG] train episode 1025: reward = 98.00, steps = 98\n",
      "00:02:46 [DEBUG] train episode 1026: reward = 105.00, steps = 105\n",
      "00:02:47 [DEBUG] train episode 1027: reward = 84.00, steps = 84\n",
      "00:02:47 [DEBUG] train episode 1028: reward = 93.00, steps = 93\n",
      "00:02:47 [DEBUG] train episode 1029: reward = 91.00, steps = 91\n",
      "00:02:47 [DEBUG] train episode 1030: reward = 74.00, steps = 74\n",
      "00:02:47 [DEBUG] train episode 1031: reward = 140.00, steps = 140\n",
      "00:02:47 [DEBUG] train episode 1032: reward = 66.00, steps = 66\n",
      "00:02:47 [DEBUG] train episode 1033: reward = 81.00, steps = 81\n",
      "00:02:47 [DEBUG] train episode 1034: reward = 76.00, steps = 76\n",
      "00:02:47 [DEBUG] train episode 1035: reward = 82.00, steps = 82\n",
      "00:02:47 [DEBUG] train episode 1036: reward = 66.00, steps = 66\n",
      "00:02:47 [DEBUG] train episode 1037: reward = 72.00, steps = 72\n",
      "00:02:47 [DEBUG] train episode 1038: reward = 129.00, steps = 129\n",
      "00:02:47 [DEBUG] train episode 1039: reward = 87.00, steps = 87\n",
      "00:02:47 [DEBUG] train episode 1040: reward = 89.00, steps = 89\n",
      "00:02:47 [DEBUG] train episode 1041: reward = 113.00, steps = 113\n",
      "00:02:47 [DEBUG] train episode 1042: reward = 66.00, steps = 66\n",
      "00:02:47 [DEBUG] train episode 1043: reward = 134.00, steps = 134\n",
      "00:02:47 [DEBUG] train episode 1044: reward = 133.00, steps = 133\n",
      "00:02:47 [DEBUG] train episode 1045: reward = 97.00, steps = 97\n",
      "00:02:47 [DEBUG] train episode 1046: reward = 83.00, steps = 83\n",
      "00:02:47 [DEBUG] train episode 1047: reward = 68.00, steps = 68\n",
      "00:02:47 [DEBUG] train episode 1048: reward = 101.00, steps = 101\n",
      "00:02:47 [DEBUG] train episode 1049: reward = 65.00, steps = 65\n",
      "00:02:47 [DEBUG] train episode 1050: reward = 53.00, steps = 53\n",
      "00:02:47 [DEBUG] train episode 1051: reward = 75.00, steps = 75\n",
      "00:02:47 [DEBUG] train episode 1052: reward = 151.00, steps = 151\n",
      "00:02:47 [DEBUG] train episode 1053: reward = 81.00, steps = 81\n",
      "00:02:47 [DEBUG] train episode 1054: reward = 102.00, steps = 102\n",
      "00:02:47 [DEBUG] train episode 1055: reward = 77.00, steps = 77\n",
      "00:02:47 [DEBUG] train episode 1056: reward = 56.00, steps = 56\n",
      "00:02:47 [DEBUG] train episode 1057: reward = 100.00, steps = 100\n",
      "00:02:47 [DEBUG] train episode 1058: reward = 68.00, steps = 68\n",
      "00:02:47 [DEBUG] train episode 1059: reward = 79.00, steps = 79\n",
      "00:02:48 [DEBUG] train episode 1060: reward = 81.00, steps = 81\n",
      "00:02:48 [DEBUG] train episode 1061: reward = 62.00, steps = 62\n",
      "00:02:48 [DEBUG] train episode 1062: reward = 72.00, steps = 72\n",
      "00:02:48 [DEBUG] train episode 1063: reward = 68.00, steps = 68\n",
      "00:02:48 [DEBUG] train episode 1064: reward = 105.00, steps = 105\n",
      "00:02:48 [DEBUG] train episode 1065: reward = 188.00, steps = 188\n",
      "00:02:48 [DEBUG] train episode 1066: reward = 70.00, steps = 70\n",
      "00:02:48 [DEBUG] train episode 1067: reward = 94.00, steps = 94\n",
      "00:02:48 [DEBUG] train episode 1068: reward = 79.00, steps = 79\n",
      "00:02:48 [DEBUG] train episode 1069: reward = 167.00, steps = 167\n",
      "00:02:48 [DEBUG] train episode 1070: reward = 148.00, steps = 148\n",
      "00:02:48 [DEBUG] train episode 1071: reward = 84.00, steps = 84\n",
      "00:02:48 [DEBUG] train episode 1072: reward = 87.00, steps = 87\n",
      "00:02:48 [DEBUG] train episode 1073: reward = 72.00, steps = 72\n",
      "00:02:48 [DEBUG] train episode 1074: reward = 80.00, steps = 80\n",
      "00:02:48 [DEBUG] train episode 1075: reward = 111.00, steps = 111\n",
      "00:02:48 [DEBUG] train episode 1076: reward = 92.00, steps = 92\n",
      "00:02:48 [DEBUG] train episode 1077: reward = 156.00, steps = 156\n",
      "00:02:48 [DEBUG] train episode 1078: reward = 109.00, steps = 109\n",
      "00:02:48 [DEBUG] train episode 1079: reward = 98.00, steps = 98\n",
      "00:02:48 [DEBUG] train episode 1080: reward = 95.00, steps = 95\n",
      "00:02:48 [DEBUG] train episode 1081: reward = 127.00, steps = 127\n",
      "00:02:48 [DEBUG] train episode 1082: reward = 128.00, steps = 128\n",
      "00:02:48 [DEBUG] train episode 1083: reward = 117.00, steps = 117\n",
      "00:02:48 [DEBUG] train episode 1084: reward = 70.00, steps = 70\n",
      "00:02:48 [DEBUG] train episode 1085: reward = 118.00, steps = 118\n",
      "00:02:48 [DEBUG] train episode 1086: reward = 75.00, steps = 75\n",
      "00:02:48 [DEBUG] train episode 1087: reward = 125.00, steps = 125\n",
      "00:02:48 [DEBUG] train episode 1088: reward = 104.00, steps = 104\n",
      "00:02:48 [DEBUG] train episode 1089: reward = 70.00, steps = 70\n",
      "00:02:48 [DEBUG] train episode 1090: reward = 73.00, steps = 73\n",
      "00:02:48 [DEBUG] train episode 1091: reward = 83.00, steps = 83\n",
      "00:02:49 [DEBUG] train episode 1092: reward = 96.00, steps = 96\n",
      "00:02:49 [DEBUG] train episode 1093: reward = 78.00, steps = 78\n",
      "00:02:49 [DEBUG] train episode 1094: reward = 83.00, steps = 83\n",
      "00:02:49 [DEBUG] train episode 1095: reward = 67.00, steps = 67\n",
      "00:02:49 [DEBUG] train episode 1096: reward = 101.00, steps = 101\n",
      "00:02:49 [DEBUG] train episode 1097: reward = 137.00, steps = 137\n",
      "00:02:49 [DEBUG] train episode 1098: reward = 84.00, steps = 84\n",
      "00:02:49 [DEBUG] train episode 1099: reward = 76.00, steps = 76\n",
      "00:02:49 [DEBUG] train episode 1100: reward = 68.00, steps = 68\n",
      "00:02:49 [DEBUG] train episode 1101: reward = 100.00, steps = 100\n",
      "00:02:49 [DEBUG] train episode 1102: reward = 61.00, steps = 61\n",
      "00:02:49 [DEBUG] train episode 1103: reward = 85.00, steps = 85\n",
      "00:02:49 [DEBUG] train episode 1104: reward = 75.00, steps = 75\n",
      "00:02:49 [DEBUG] train episode 1105: reward = 72.00, steps = 72\n",
      "00:02:49 [DEBUG] train episode 1106: reward = 76.00, steps = 76\n",
      "00:02:49 [DEBUG] train episode 1107: reward = 83.00, steps = 83\n",
      "00:02:49 [DEBUG] train episode 1108: reward = 66.00, steps = 66\n",
      "00:02:49 [DEBUG] train episode 1109: reward = 74.00, steps = 74\n",
      "00:02:49 [DEBUG] train episode 1110: reward = 93.00, steps = 93\n",
      "00:02:49 [DEBUG] train episode 1111: reward = 63.00, steps = 63\n",
      "00:02:49 [DEBUG] train episode 1112: reward = 98.00, steps = 98\n",
      "00:02:49 [DEBUG] train episode 1113: reward = 89.00, steps = 89\n",
      "00:02:49 [DEBUG] train episode 1114: reward = 129.00, steps = 129\n",
      "00:02:49 [DEBUG] train episode 1115: reward = 95.00, steps = 95\n",
      "00:02:49 [DEBUG] train episode 1116: reward = 77.00, steps = 77\n",
      "00:02:49 [DEBUG] train episode 1117: reward = 60.00, steps = 60\n",
      "00:02:49 [DEBUG] train episode 1118: reward = 94.00, steps = 94\n",
      "00:02:49 [DEBUG] train episode 1119: reward = 92.00, steps = 92\n",
      "00:02:49 [DEBUG] train episode 1120: reward = 83.00, steps = 83\n",
      "00:02:49 [DEBUG] train episode 1121: reward = 52.00, steps = 52\n",
      "00:02:49 [DEBUG] train episode 1122: reward = 108.00, steps = 108\n",
      "00:02:49 [DEBUG] train episode 1123: reward = 112.00, steps = 112\n",
      "00:02:49 [DEBUG] train episode 1124: reward = 178.00, steps = 178\n",
      "00:02:49 [DEBUG] train episode 1125: reward = 93.00, steps = 93\n",
      "00:02:49 [DEBUG] train episode 1126: reward = 101.00, steps = 101\n",
      "00:02:50 [DEBUG] train episode 1127: reward = 70.00, steps = 70\n",
      "00:02:50 [DEBUG] train episode 1128: reward = 116.00, steps = 116\n",
      "00:02:50 [DEBUG] train episode 1129: reward = 134.00, steps = 134\n",
      "00:02:50 [DEBUG] train episode 1130: reward = 68.00, steps = 68\n",
      "00:02:50 [DEBUG] train episode 1131: reward = 93.00, steps = 93\n",
      "00:02:50 [DEBUG] train episode 1132: reward = 73.00, steps = 73\n",
      "00:02:50 [DEBUG] train episode 1133: reward = 62.00, steps = 62\n",
      "00:02:50 [DEBUG] train episode 1134: reward = 83.00, steps = 83\n",
      "00:02:50 [DEBUG] train episode 1135: reward = 82.00, steps = 82\n",
      "00:02:50 [DEBUG] train episode 1136: reward = 98.00, steps = 98\n",
      "00:02:50 [DEBUG] train episode 1137: reward = 136.00, steps = 136\n",
      "00:02:50 [DEBUG] train episode 1138: reward = 120.00, steps = 120\n",
      "00:02:50 [DEBUG] train episode 1139: reward = 84.00, steps = 84\n",
      "00:02:50 [DEBUG] train episode 1140: reward = 97.00, steps = 97\n",
      "00:02:50 [DEBUG] train episode 1141: reward = 105.00, steps = 105\n",
      "00:02:50 [DEBUG] train episode 1142: reward = 79.00, steps = 79\n",
      "00:02:50 [DEBUG] train episode 1143: reward = 121.00, steps = 121\n",
      "00:02:50 [DEBUG] train episode 1144: reward = 93.00, steps = 93\n",
      "00:02:50 [DEBUG] train episode 1145: reward = 101.00, steps = 101\n",
      "00:02:50 [DEBUG] train episode 1146: reward = 142.00, steps = 142\n",
      "00:02:50 [DEBUG] train episode 1147: reward = 172.00, steps = 172\n",
      "00:02:50 [DEBUG] train episode 1148: reward = 109.00, steps = 109\n",
      "00:02:50 [DEBUG] train episode 1149: reward = 118.00, steps = 118\n",
      "00:02:50 [DEBUG] train episode 1150: reward = 88.00, steps = 88\n",
      "00:02:50 [DEBUG] train episode 1151: reward = 69.00, steps = 69\n",
      "00:02:50 [DEBUG] train episode 1152: reward = 86.00, steps = 86\n",
      "00:02:50 [DEBUG] train episode 1153: reward = 136.00, steps = 136\n",
      "00:02:50 [DEBUG] train episode 1154: reward = 73.00, steps = 73\n",
      "00:02:50 [DEBUG] train episode 1155: reward = 73.00, steps = 73\n",
      "00:02:50 [DEBUG] train episode 1156: reward = 83.00, steps = 83\n",
      "00:02:50 [DEBUG] train episode 1157: reward = 79.00, steps = 79\n",
      "00:02:50 [DEBUG] train episode 1158: reward = 99.00, steps = 99\n",
      "00:02:51 [DEBUG] train episode 1159: reward = 71.00, steps = 71\n",
      "00:02:51 [DEBUG] train episode 1160: reward = 92.00, steps = 92\n",
      "00:02:51 [DEBUG] train episode 1161: reward = 149.00, steps = 149\n",
      "00:02:51 [DEBUG] train episode 1162: reward = 62.00, steps = 62\n",
      "00:02:51 [DEBUG] train episode 1163: reward = 87.00, steps = 87\n",
      "00:02:51 [DEBUG] train episode 1164: reward = 165.00, steps = 165\n",
      "00:02:51 [DEBUG] train episode 1165: reward = 87.00, steps = 87\n",
      "00:02:51 [DEBUG] train episode 1166: reward = 142.00, steps = 142\n",
      "00:02:51 [DEBUG] train episode 1167: reward = 96.00, steps = 96\n",
      "00:02:51 [DEBUG] train episode 1168: reward = 97.00, steps = 97\n",
      "00:02:51 [DEBUG] train episode 1169: reward = 113.00, steps = 113\n",
      "00:02:51 [DEBUG] train episode 1170: reward = 60.00, steps = 60\n",
      "00:02:51 [DEBUG] train episode 1171: reward = 58.00, steps = 58\n",
      "00:02:51 [DEBUG] train episode 1172: reward = 75.00, steps = 75\n",
      "00:02:51 [DEBUG] train episode 1173: reward = 88.00, steps = 88\n",
      "00:02:51 [DEBUG] train episode 1174: reward = 174.00, steps = 174\n",
      "00:02:51 [DEBUG] train episode 1175: reward = 87.00, steps = 87\n",
      "00:02:51 [DEBUG] train episode 1176: reward = 76.00, steps = 76\n",
      "00:02:51 [DEBUG] train episode 1177: reward = 146.00, steps = 146\n",
      "00:02:51 [DEBUG] train episode 1178: reward = 94.00, steps = 94\n",
      "00:02:51 [DEBUG] train episode 1179: reward = 127.00, steps = 127\n",
      "00:02:51 [DEBUG] train episode 1180: reward = 82.00, steps = 82\n",
      "00:02:51 [DEBUG] train episode 1181: reward = 80.00, steps = 80\n",
      "00:02:51 [DEBUG] train episode 1182: reward = 124.00, steps = 124\n",
      "00:02:51 [DEBUG] train episode 1183: reward = 75.00, steps = 75\n",
      "00:02:51 [DEBUG] train episode 1184: reward = 60.00, steps = 60\n",
      "00:02:51 [DEBUG] train episode 1185: reward = 74.00, steps = 74\n",
      "00:02:51 [DEBUG] train episode 1186: reward = 66.00, steps = 66\n",
      "00:02:51 [DEBUG] train episode 1187: reward = 99.00, steps = 99\n",
      "00:02:51 [DEBUG] train episode 1188: reward = 78.00, steps = 78\n",
      "00:02:51 [DEBUG] train episode 1189: reward = 78.00, steps = 78\n",
      "00:02:51 [DEBUG] train episode 1190: reward = 76.00, steps = 76\n",
      "00:02:51 [DEBUG] train episode 1191: reward = 66.00, steps = 66\n",
      "00:02:51 [DEBUG] train episode 1192: reward = 59.00, steps = 59\n",
      "00:02:52 [DEBUG] train episode 1193: reward = 67.00, steps = 67\n",
      "00:02:52 [DEBUG] train episode 1194: reward = 67.00, steps = 67\n",
      "00:02:52 [DEBUG] train episode 1195: reward = 111.00, steps = 111\n",
      "00:02:52 [DEBUG] train episode 1196: reward = 98.00, steps = 98\n",
      "00:02:52 [DEBUG] train episode 1197: reward = 81.00, steps = 81\n",
      "00:02:52 [DEBUG] train episode 1198: reward = 66.00, steps = 66\n",
      "00:02:52 [DEBUG] train episode 1199: reward = 117.00, steps = 117\n",
      "00:02:52 [DEBUG] train episode 1200: reward = 82.00, steps = 82\n",
      "00:02:52 [DEBUG] train episode 1201: reward = 111.00, steps = 111\n",
      "00:02:52 [DEBUG] train episode 1202: reward = 67.00, steps = 67\n",
      "00:02:52 [DEBUG] train episode 1203: reward = 152.00, steps = 152\n",
      "00:02:52 [DEBUG] train episode 1204: reward = 151.00, steps = 151\n",
      "00:02:52 [DEBUG] train episode 1205: reward = 73.00, steps = 73\n",
      "00:02:52 [DEBUG] train episode 1206: reward = 110.00, steps = 110\n",
      "00:02:52 [DEBUG] train episode 1207: reward = 70.00, steps = 70\n",
      "00:02:52 [DEBUG] train episode 1208: reward = 73.00, steps = 73\n",
      "00:02:52 [DEBUG] train episode 1209: reward = 111.00, steps = 111\n",
      "00:02:52 [DEBUG] train episode 1210: reward = 124.00, steps = 124\n",
      "00:02:52 [DEBUG] train episode 1211: reward = 82.00, steps = 82\n",
      "00:02:52 [DEBUG] train episode 1212: reward = 90.00, steps = 90\n",
      "00:02:52 [DEBUG] train episode 1213: reward = 83.00, steps = 83\n",
      "00:02:52 [DEBUG] train episode 1214: reward = 92.00, steps = 92\n",
      "00:02:52 [DEBUG] train episode 1215: reward = 102.00, steps = 102\n",
      "00:02:52 [DEBUG] train episode 1216: reward = 76.00, steps = 76\n",
      "00:02:52 [DEBUG] train episode 1217: reward = 145.00, steps = 145\n",
      "00:02:52 [DEBUG] train episode 1218: reward = 92.00, steps = 92\n",
      "00:02:52 [DEBUG] train episode 1219: reward = 94.00, steps = 94\n",
      "00:02:52 [DEBUG] train episode 1220: reward = 78.00, steps = 78\n",
      "00:02:52 [DEBUG] train episode 1221: reward = 171.00, steps = 171\n",
      "00:02:52 [DEBUG] train episode 1222: reward = 200.00, steps = 200\n",
      "00:02:53 [DEBUG] train episode 1223: reward = 89.00, steps = 89\n",
      "00:02:53 [DEBUG] train episode 1224: reward = 72.00, steps = 72\n",
      "00:02:53 [DEBUG] train episode 1225: reward = 121.00, steps = 121\n",
      "00:02:53 [DEBUG] train episode 1226: reward = 82.00, steps = 82\n",
      "00:02:53 [DEBUG] train episode 1227: reward = 82.00, steps = 82\n",
      "00:02:53 [DEBUG] train episode 1228: reward = 118.00, steps = 118\n",
      "00:02:53 [DEBUG] train episode 1229: reward = 86.00, steps = 86\n",
      "00:02:53 [DEBUG] train episode 1230: reward = 120.00, steps = 120\n",
      "00:02:53 [DEBUG] train episode 1231: reward = 98.00, steps = 98\n",
      "00:02:53 [DEBUG] train episode 1232: reward = 87.00, steps = 87\n",
      "00:02:53 [DEBUG] train episode 1233: reward = 83.00, steps = 83\n",
      "00:02:53 [DEBUG] train episode 1234: reward = 61.00, steps = 61\n",
      "00:02:53 [DEBUG] train episode 1235: reward = 120.00, steps = 120\n",
      "00:02:53 [DEBUG] train episode 1236: reward = 89.00, steps = 89\n",
      "00:02:53 [DEBUG] train episode 1237: reward = 103.00, steps = 103\n",
      "00:02:53 [DEBUG] train episode 1238: reward = 93.00, steps = 93\n",
      "00:02:53 [DEBUG] train episode 1239: reward = 88.00, steps = 88\n",
      "00:02:53 [DEBUG] train episode 1240: reward = 74.00, steps = 74\n",
      "00:02:53 [DEBUG] train episode 1241: reward = 104.00, steps = 104\n",
      "00:02:53 [DEBUG] train episode 1242: reward = 91.00, steps = 91\n",
      "00:02:53 [DEBUG] train episode 1243: reward = 80.00, steps = 80\n",
      "00:02:53 [DEBUG] train episode 1244: reward = 66.00, steps = 66\n",
      "00:02:53 [DEBUG] train episode 1245: reward = 92.00, steps = 92\n",
      "00:02:53 [DEBUG] train episode 1246: reward = 80.00, steps = 80\n",
      "00:02:53 [DEBUG] train episode 1247: reward = 200.00, steps = 200\n",
      "00:02:53 [DEBUG] train episode 1248: reward = 119.00, steps = 119\n",
      "00:02:53 [DEBUG] train episode 1249: reward = 130.00, steps = 130\n",
      "00:02:53 [DEBUG] train episode 1250: reward = 187.00, steps = 187\n",
      "00:02:53 [DEBUG] train episode 1251: reward = 137.00, steps = 137\n",
      "00:02:54 [DEBUG] train episode 1252: reward = 172.00, steps = 172\n",
      "00:02:54 [DEBUG] train episode 1253: reward = 140.00, steps = 140\n",
      "00:02:54 [DEBUG] train episode 1254: reward = 103.00, steps = 103\n",
      "00:02:54 [DEBUG] train episode 1255: reward = 122.00, steps = 122\n",
      "00:02:54 [DEBUG] train episode 1256: reward = 77.00, steps = 77\n",
      "00:02:54 [DEBUG] train episode 1257: reward = 139.00, steps = 139\n",
      "00:02:54 [DEBUG] train episode 1258: reward = 98.00, steps = 98\n",
      "00:02:54 [DEBUG] train episode 1259: reward = 200.00, steps = 200\n",
      "00:02:54 [DEBUG] train episode 1260: reward = 137.00, steps = 137\n",
      "00:02:54 [DEBUG] train episode 1261: reward = 119.00, steps = 119\n",
      "00:02:54 [DEBUG] train episode 1262: reward = 82.00, steps = 82\n",
      "00:02:54 [DEBUG] train episode 1263: reward = 85.00, steps = 85\n",
      "00:02:54 [DEBUG] train episode 1264: reward = 108.00, steps = 108\n",
      "00:02:54 [DEBUG] train episode 1265: reward = 178.00, steps = 178\n",
      "00:02:54 [DEBUG] train episode 1266: reward = 107.00, steps = 107\n",
      "00:02:54 [DEBUG] train episode 1267: reward = 134.00, steps = 134\n",
      "00:02:54 [DEBUG] train episode 1268: reward = 135.00, steps = 135\n",
      "00:02:54 [DEBUG] train episode 1269: reward = 108.00, steps = 108\n",
      "00:02:54 [DEBUG] train episode 1270: reward = 97.00, steps = 97\n",
      "00:02:54 [DEBUG] train episode 1271: reward = 103.00, steps = 103\n",
      "00:02:54 [DEBUG] train episode 1272: reward = 68.00, steps = 68\n",
      "00:02:54 [DEBUG] train episode 1273: reward = 100.00, steps = 100\n",
      "00:02:54 [DEBUG] train episode 1274: reward = 99.00, steps = 99\n",
      "00:02:54 [DEBUG] train episode 1275: reward = 90.00, steps = 90\n",
      "00:02:54 [DEBUG] train episode 1276: reward = 121.00, steps = 121\n",
      "00:02:54 [DEBUG] train episode 1277: reward = 181.00, steps = 181\n",
      "00:02:54 [DEBUG] train episode 1278: reward = 113.00, steps = 113\n",
      "00:02:54 [DEBUG] train episode 1279: reward = 71.00, steps = 71\n",
      "00:02:55 [DEBUG] train episode 1280: reward = 89.00, steps = 89\n",
      "00:02:55 [DEBUG] train episode 1281: reward = 117.00, steps = 117\n",
      "00:02:55 [DEBUG] train episode 1282: reward = 85.00, steps = 85\n",
      "00:02:55 [DEBUG] train episode 1283: reward = 170.00, steps = 170\n",
      "00:02:55 [DEBUG] train episode 1284: reward = 108.00, steps = 108\n",
      "00:02:55 [DEBUG] train episode 1285: reward = 119.00, steps = 119\n",
      "00:02:55 [DEBUG] train episode 1286: reward = 122.00, steps = 122\n",
      "00:02:55 [DEBUG] train episode 1287: reward = 114.00, steps = 114\n",
      "00:02:55 [DEBUG] train episode 1288: reward = 76.00, steps = 76\n",
      "00:02:55 [DEBUG] train episode 1289: reward = 94.00, steps = 94\n",
      "00:02:55 [DEBUG] train episode 1290: reward = 76.00, steps = 76\n",
      "00:02:55 [DEBUG] train episode 1291: reward = 73.00, steps = 73\n",
      "00:02:55 [DEBUG] train episode 1292: reward = 151.00, steps = 151\n",
      "00:02:55 [DEBUG] train episode 1293: reward = 92.00, steps = 92\n",
      "00:02:55 [DEBUG] train episode 1294: reward = 150.00, steps = 150\n",
      "00:02:55 [DEBUG] train episode 1295: reward = 99.00, steps = 99\n",
      "00:02:55 [DEBUG] train episode 1296: reward = 90.00, steps = 90\n",
      "00:02:55 [DEBUG] train episode 1297: reward = 152.00, steps = 152\n",
      "00:02:55 [DEBUG] train episode 1298: reward = 96.00, steps = 96\n",
      "00:02:55 [DEBUG] train episode 1299: reward = 86.00, steps = 86\n",
      "00:02:55 [DEBUG] train episode 1300: reward = 81.00, steps = 81\n",
      "00:02:55 [DEBUG] train episode 1301: reward = 76.00, steps = 76\n",
      "00:02:55 [DEBUG] train episode 1302: reward = 130.00, steps = 130\n",
      "00:02:55 [DEBUG] train episode 1303: reward = 88.00, steps = 88\n",
      "00:02:55 [DEBUG] train episode 1304: reward = 131.00, steps = 131\n",
      "00:02:55 [DEBUG] train episode 1305: reward = 94.00, steps = 94\n",
      "00:02:55 [DEBUG] train episode 1306: reward = 123.00, steps = 123\n",
      "00:02:55 [DEBUG] train episode 1307: reward = 141.00, steps = 141\n",
      "00:02:55 [DEBUG] train episode 1308: reward = 117.00, steps = 117\n",
      "00:02:56 [DEBUG] train episode 1309: reward = 82.00, steps = 82\n",
      "00:02:56 [DEBUG] train episode 1310: reward = 135.00, steps = 135\n",
      "00:02:56 [DEBUG] train episode 1311: reward = 93.00, steps = 93\n",
      "00:02:56 [DEBUG] train episode 1312: reward = 75.00, steps = 75\n",
      "00:02:56 [DEBUG] train episode 1313: reward = 127.00, steps = 127\n",
      "00:02:56 [DEBUG] train episode 1314: reward = 100.00, steps = 100\n",
      "00:02:56 [DEBUG] train episode 1315: reward = 81.00, steps = 81\n",
      "00:02:56 [DEBUG] train episode 1316: reward = 139.00, steps = 139\n",
      "00:02:56 [DEBUG] train episode 1317: reward = 132.00, steps = 132\n",
      "00:02:56 [DEBUG] train episode 1318: reward = 100.00, steps = 100\n",
      "00:02:56 [DEBUG] train episode 1319: reward = 200.00, steps = 200\n",
      "00:02:56 [DEBUG] train episode 1320: reward = 93.00, steps = 93\n",
      "00:02:56 [DEBUG] train episode 1321: reward = 125.00, steps = 125\n",
      "00:02:56 [DEBUG] train episode 1322: reward = 164.00, steps = 164\n",
      "00:02:56 [DEBUG] train episode 1323: reward = 153.00, steps = 153\n",
      "00:02:56 [DEBUG] train episode 1324: reward = 118.00, steps = 118\n",
      "00:02:56 [DEBUG] train episode 1325: reward = 78.00, steps = 78\n",
      "00:02:56 [DEBUG] train episode 1326: reward = 95.00, steps = 95\n",
      "00:02:56 [DEBUG] train episode 1327: reward = 78.00, steps = 78\n",
      "00:02:56 [DEBUG] train episode 1328: reward = 140.00, steps = 140\n",
      "00:02:56 [DEBUG] train episode 1329: reward = 109.00, steps = 109\n",
      "00:02:56 [DEBUG] train episode 1330: reward = 149.00, steps = 149\n",
      "00:02:56 [DEBUG] train episode 1331: reward = 138.00, steps = 138\n",
      "00:02:56 [DEBUG] train episode 1332: reward = 131.00, steps = 131\n",
      "00:02:56 [DEBUG] train episode 1333: reward = 155.00, steps = 155\n",
      "00:02:56 [DEBUG] train episode 1334: reward = 143.00, steps = 143\n",
      "00:02:57 [DEBUG] train episode 1335: reward = 122.00, steps = 122\n",
      "00:02:57 [DEBUG] train episode 1336: reward = 105.00, steps = 105\n",
      "00:02:57 [DEBUG] train episode 1337: reward = 177.00, steps = 177\n",
      "00:02:57 [DEBUG] train episode 1338: reward = 145.00, steps = 145\n",
      "00:02:57 [DEBUG] train episode 1339: reward = 118.00, steps = 118\n",
      "00:02:57 [DEBUG] train episode 1340: reward = 111.00, steps = 111\n",
      "00:02:57 [DEBUG] train episode 1341: reward = 105.00, steps = 105\n",
      "00:02:57 [DEBUG] train episode 1342: reward = 101.00, steps = 101\n",
      "00:02:57 [DEBUG] train episode 1343: reward = 198.00, steps = 198\n",
      "00:02:57 [DEBUG] train episode 1344: reward = 108.00, steps = 108\n",
      "00:02:57 [DEBUG] train episode 1345: reward = 139.00, steps = 139\n",
      "00:02:57 [DEBUG] train episode 1346: reward = 133.00, steps = 133\n",
      "00:02:57 [DEBUG] train episode 1347: reward = 148.00, steps = 148\n",
      "00:02:57 [DEBUG] train episode 1348: reward = 143.00, steps = 143\n",
      "00:02:57 [DEBUG] train episode 1349: reward = 137.00, steps = 137\n",
      "00:02:57 [DEBUG] train episode 1350: reward = 139.00, steps = 139\n",
      "00:02:57 [DEBUG] train episode 1351: reward = 79.00, steps = 79\n",
      "00:02:57 [DEBUG] train episode 1352: reward = 91.00, steps = 91\n",
      "00:02:57 [DEBUG] train episode 1353: reward = 136.00, steps = 136\n",
      "00:02:57 [DEBUG] train episode 1354: reward = 90.00, steps = 90\n",
      "00:02:57 [DEBUG] train episode 1355: reward = 133.00, steps = 133\n",
      "00:02:57 [DEBUG] train episode 1356: reward = 119.00, steps = 119\n",
      "00:02:57 [DEBUG] train episode 1357: reward = 121.00, steps = 121\n",
      "00:02:57 [DEBUG] train episode 1358: reward = 130.00, steps = 130\n",
      "00:02:57 [DEBUG] train episode 1359: reward = 110.00, steps = 110\n",
      "00:02:58 [DEBUG] train episode 1360: reward = 123.00, steps = 123\n",
      "00:02:58 [DEBUG] train episode 1361: reward = 96.00, steps = 96\n",
      "00:02:58 [DEBUG] train episode 1362: reward = 156.00, steps = 156\n",
      "00:02:58 [DEBUG] train episode 1363: reward = 94.00, steps = 94\n",
      "00:02:58 [DEBUG] train episode 1364: reward = 111.00, steps = 111\n",
      "00:02:58 [DEBUG] train episode 1365: reward = 200.00, steps = 200\n",
      "00:02:58 [DEBUG] train episode 1366: reward = 151.00, steps = 151\n",
      "00:02:58 [DEBUG] train episode 1367: reward = 149.00, steps = 149\n",
      "00:02:58 [DEBUG] train episode 1368: reward = 155.00, steps = 155\n",
      "00:02:58 [DEBUG] train episode 1369: reward = 144.00, steps = 144\n",
      "00:02:58 [DEBUG] train episode 1370: reward = 90.00, steps = 90\n",
      "00:02:58 [DEBUG] train episode 1371: reward = 120.00, steps = 120\n",
      "00:02:58 [DEBUG] train episode 1372: reward = 124.00, steps = 124\n",
      "00:02:58 [DEBUG] train episode 1373: reward = 118.00, steps = 118\n",
      "00:02:58 [DEBUG] train episode 1374: reward = 169.00, steps = 169\n",
      "00:02:58 [DEBUG] train episode 1375: reward = 113.00, steps = 113\n",
      "00:02:58 [DEBUG] train episode 1376: reward = 111.00, steps = 111\n",
      "00:02:58 [DEBUG] train episode 1377: reward = 149.00, steps = 149\n",
      "00:02:58 [DEBUG] train episode 1378: reward = 143.00, steps = 143\n",
      "00:02:58 [DEBUG] train episode 1379: reward = 88.00, steps = 88\n",
      "00:02:58 [DEBUG] train episode 1380: reward = 120.00, steps = 120\n",
      "00:02:58 [DEBUG] train episode 1381: reward = 106.00, steps = 106\n",
      "00:02:58 [DEBUG] train episode 1382: reward = 109.00, steps = 109\n",
      "00:02:58 [DEBUG] train episode 1383: reward = 145.00, steps = 145\n",
      "00:02:58 [DEBUG] train episode 1384: reward = 115.00, steps = 115\n",
      "00:02:59 [DEBUG] train episode 1385: reward = 104.00, steps = 104\n",
      "00:02:59 [DEBUG] train episode 1386: reward = 98.00, steps = 98\n",
      "00:02:59 [DEBUG] train episode 1387: reward = 106.00, steps = 106\n",
      "00:02:59 [DEBUG] train episode 1388: reward = 200.00, steps = 200\n",
      "00:02:59 [DEBUG] train episode 1389: reward = 127.00, steps = 127\n",
      "00:02:59 [DEBUG] train episode 1390: reward = 133.00, steps = 133\n",
      "00:02:59 [DEBUG] train episode 1391: reward = 107.00, steps = 107\n",
      "00:02:59 [DEBUG] train episode 1392: reward = 153.00, steps = 153\n",
      "00:02:59 [DEBUG] train episode 1393: reward = 125.00, steps = 125\n",
      "00:02:59 [DEBUG] train episode 1394: reward = 106.00, steps = 106\n",
      "00:02:59 [DEBUG] train episode 1395: reward = 116.00, steps = 116\n",
      "00:02:59 [DEBUG] train episode 1396: reward = 161.00, steps = 161\n",
      "00:02:59 [DEBUG] train episode 1397: reward = 118.00, steps = 118\n",
      "00:02:59 [DEBUG] train episode 1398: reward = 77.00, steps = 77\n",
      "00:02:59 [DEBUG] train episode 1399: reward = 102.00, steps = 102\n",
      "00:02:59 [DEBUG] train episode 1400: reward = 92.00, steps = 92\n",
      "00:02:59 [DEBUG] train episode 1401: reward = 129.00, steps = 129\n",
      "00:02:59 [DEBUG] train episode 1402: reward = 118.00, steps = 118\n",
      "00:02:59 [DEBUG] train episode 1403: reward = 91.00, steps = 91\n",
      "00:02:59 [DEBUG] train episode 1404: reward = 133.00, steps = 133\n",
      "00:02:59 [DEBUG] train episode 1405: reward = 164.00, steps = 164\n",
      "00:02:59 [DEBUG] train episode 1406: reward = 113.00, steps = 113\n",
      "00:02:59 [DEBUG] train episode 1407: reward = 163.00, steps = 163\n",
      "00:02:59 [DEBUG] train episode 1408: reward = 160.00, steps = 160\n",
      "00:02:59 [DEBUG] train episode 1409: reward = 180.00, steps = 180\n",
      "00:02:59 [DEBUG] train episode 1410: reward = 93.00, steps = 93\n",
      "00:03:00 [DEBUG] train episode 1411: reward = 200.00, steps = 200\n",
      "00:03:00 [DEBUG] train episode 1412: reward = 168.00, steps = 168\n",
      "00:03:00 [DEBUG] train episode 1413: reward = 127.00, steps = 127\n",
      "00:03:00 [DEBUG] train episode 1414: reward = 174.00, steps = 174\n",
      "00:03:00 [DEBUG] train episode 1415: reward = 86.00, steps = 86\n",
      "00:03:00 [DEBUG] train episode 1416: reward = 105.00, steps = 105\n",
      "00:03:00 [DEBUG] train episode 1417: reward = 96.00, steps = 96\n",
      "00:03:00 [DEBUG] train episode 1418: reward = 85.00, steps = 85\n",
      "00:03:00 [DEBUG] train episode 1419: reward = 136.00, steps = 136\n",
      "00:03:00 [DEBUG] train episode 1420: reward = 175.00, steps = 175\n",
      "00:03:00 [DEBUG] train episode 1421: reward = 117.00, steps = 117\n",
      "00:03:00 [DEBUG] train episode 1422: reward = 130.00, steps = 130\n",
      "00:03:00 [DEBUG] train episode 1423: reward = 124.00, steps = 124\n",
      "00:03:00 [DEBUG] train episode 1424: reward = 127.00, steps = 127\n",
      "00:03:00 [DEBUG] train episode 1425: reward = 105.00, steps = 105\n",
      "00:03:00 [DEBUG] train episode 1426: reward = 200.00, steps = 200\n",
      "00:03:00 [DEBUG] train episode 1427: reward = 119.00, steps = 119\n",
      "00:03:00 [DEBUG] train episode 1428: reward = 94.00, steps = 94\n",
      "00:03:00 [DEBUG] train episode 1429: reward = 122.00, steps = 122\n",
      "00:03:00 [DEBUG] train episode 1430: reward = 126.00, steps = 126\n",
      "00:03:00 [DEBUG] train episode 1431: reward = 86.00, steps = 86\n",
      "00:03:00 [DEBUG] train episode 1432: reward = 120.00, steps = 120\n",
      "00:03:00 [DEBUG] train episode 1433: reward = 157.00, steps = 157\n",
      "00:03:00 [DEBUG] train episode 1434: reward = 106.00, steps = 106\n",
      "00:03:00 [DEBUG] train episode 1435: reward = 129.00, steps = 129\n",
      "00:03:01 [DEBUG] train episode 1436: reward = 123.00, steps = 123\n",
      "00:03:01 [DEBUG] train episode 1437: reward = 133.00, steps = 133\n",
      "00:03:01 [DEBUG] train episode 1438: reward = 116.00, steps = 116\n",
      "00:03:01 [DEBUG] train episode 1439: reward = 106.00, steps = 106\n",
      "00:03:01 [DEBUG] train episode 1440: reward = 135.00, steps = 135\n",
      "00:03:01 [DEBUG] train episode 1441: reward = 132.00, steps = 132\n",
      "00:03:01 [DEBUG] train episode 1442: reward = 80.00, steps = 80\n",
      "00:03:01 [DEBUG] train episode 1443: reward = 190.00, steps = 190\n",
      "00:03:01 [DEBUG] train episode 1444: reward = 154.00, steps = 154\n",
      "00:03:01 [DEBUG] train episode 1445: reward = 104.00, steps = 104\n",
      "00:03:01 [DEBUG] train episode 1446: reward = 93.00, steps = 93\n",
      "00:03:01 [DEBUG] train episode 1447: reward = 134.00, steps = 134\n",
      "00:03:01 [DEBUG] train episode 1448: reward = 111.00, steps = 111\n",
      "00:03:01 [DEBUG] train episode 1449: reward = 91.00, steps = 91\n",
      "00:03:01 [DEBUG] train episode 1450: reward = 174.00, steps = 174\n",
      "00:03:01 [DEBUG] train episode 1451: reward = 200.00, steps = 200\n",
      "00:03:01 [DEBUG] train episode 1452: reward = 87.00, steps = 87\n",
      "00:03:01 [DEBUG] train episode 1453: reward = 155.00, steps = 155\n",
      "00:03:01 [DEBUG] train episode 1454: reward = 200.00, steps = 200\n",
      "00:03:01 [DEBUG] train episode 1455: reward = 139.00, steps = 139\n",
      "00:03:01 [DEBUG] train episode 1456: reward = 152.00, steps = 152\n",
      "00:03:01 [DEBUG] train episode 1457: reward = 123.00, steps = 123\n",
      "00:03:01 [DEBUG] train episode 1458: reward = 117.00, steps = 117\n",
      "00:03:01 [DEBUG] train episode 1459: reward = 128.00, steps = 128\n",
      "00:03:01 [DEBUG] train episode 1460: reward = 102.00, steps = 102\n",
      "00:03:02 [DEBUG] train episode 1461: reward = 138.00, steps = 138\n",
      "00:03:02 [DEBUG] train episode 1462: reward = 99.00, steps = 99\n",
      "00:03:02 [DEBUG] train episode 1463: reward = 123.00, steps = 123\n",
      "00:03:02 [DEBUG] train episode 1464: reward = 164.00, steps = 164\n",
      "00:03:02 [DEBUG] train episode 1465: reward = 143.00, steps = 143\n",
      "00:03:02 [DEBUG] train episode 1466: reward = 198.00, steps = 198\n",
      "00:03:02 [DEBUG] train episode 1467: reward = 114.00, steps = 114\n",
      "00:03:02 [DEBUG] train episode 1468: reward = 118.00, steps = 118\n",
      "00:03:02 [DEBUG] train episode 1469: reward = 200.00, steps = 200\n",
      "00:03:02 [DEBUG] train episode 1470: reward = 142.00, steps = 142\n",
      "00:03:02 [DEBUG] train episode 1471: reward = 106.00, steps = 106\n",
      "00:03:02 [DEBUG] train episode 1472: reward = 123.00, steps = 123\n",
      "00:03:02 [DEBUG] train episode 1473: reward = 139.00, steps = 139\n",
      "00:03:02 [DEBUG] train episode 1474: reward = 149.00, steps = 149\n",
      "00:03:02 [DEBUG] train episode 1475: reward = 108.00, steps = 108\n",
      "00:03:02 [DEBUG] train episode 1476: reward = 120.00, steps = 120\n",
      "00:03:02 [DEBUG] train episode 1477: reward = 123.00, steps = 123\n",
      "00:03:02 [DEBUG] train episode 1478: reward = 147.00, steps = 147\n",
      "00:03:02 [DEBUG] train episode 1479: reward = 117.00, steps = 117\n",
      "00:03:02 [DEBUG] train episode 1480: reward = 105.00, steps = 105\n",
      "00:03:02 [DEBUG] train episode 1481: reward = 95.00, steps = 95\n",
      "00:03:02 [DEBUG] train episode 1482: reward = 179.00, steps = 179\n",
      "00:03:02 [DEBUG] train episode 1483: reward = 118.00, steps = 118\n",
      "00:03:02 [DEBUG] train episode 1484: reward = 136.00, steps = 136\n",
      "00:03:02 [DEBUG] train episode 1485: reward = 118.00, steps = 118\n",
      "00:03:03 [DEBUG] train episode 1486: reward = 187.00, steps = 187\n",
      "00:03:03 [DEBUG] train episode 1487: reward = 109.00, steps = 109\n",
      "00:03:03 [DEBUG] train episode 1488: reward = 138.00, steps = 138\n",
      "00:03:03 [DEBUG] train episode 1489: reward = 139.00, steps = 139\n",
      "00:03:03 [DEBUG] train episode 1490: reward = 133.00, steps = 133\n",
      "00:03:03 [DEBUG] train episode 1491: reward = 190.00, steps = 190\n",
      "00:03:03 [DEBUG] train episode 1492: reward = 158.00, steps = 158\n",
      "00:03:03 [DEBUG] train episode 1493: reward = 146.00, steps = 146\n",
      "00:03:03 [DEBUG] train episode 1494: reward = 105.00, steps = 105\n",
      "00:03:03 [DEBUG] train episode 1495: reward = 105.00, steps = 105\n",
      "00:03:03 [DEBUG] train episode 1496: reward = 124.00, steps = 124\n",
      "00:03:03 [DEBUG] train episode 1497: reward = 142.00, steps = 142\n",
      "00:03:03 [DEBUG] train episode 1498: reward = 200.00, steps = 200\n",
      "00:03:03 [DEBUG] train episode 1499: reward = 170.00, steps = 170\n",
      "00:03:03 [DEBUG] train episode 1500: reward = 200.00, steps = 200\n",
      "00:03:03 [DEBUG] train episode 1501: reward = 131.00, steps = 131\n",
      "00:03:03 [DEBUG] train episode 1502: reward = 106.00, steps = 106\n",
      "00:03:03 [DEBUG] train episode 1503: reward = 166.00, steps = 166\n",
      "00:03:03 [DEBUG] train episode 1504: reward = 123.00, steps = 123\n",
      "00:03:03 [DEBUG] train episode 1505: reward = 150.00, steps = 150\n",
      "00:03:03 [DEBUG] train episode 1506: reward = 170.00, steps = 170\n",
      "00:03:03 [DEBUG] train episode 1507: reward = 153.00, steps = 153\n",
      "00:03:04 [DEBUG] train episode 1508: reward = 109.00, steps = 109\n",
      "00:03:04 [DEBUG] train episode 1509: reward = 200.00, steps = 200\n",
      "00:03:04 [DEBUG] train episode 1510: reward = 200.00, steps = 200\n",
      "00:03:04 [DEBUG] train episode 1511: reward = 187.00, steps = 187\n",
      "00:03:04 [DEBUG] train episode 1512: reward = 131.00, steps = 131\n",
      "00:03:04 [DEBUG] train episode 1513: reward = 200.00, steps = 200\n",
      "00:03:04 [DEBUG] train episode 1514: reward = 158.00, steps = 158\n",
      "00:03:04 [DEBUG] train episode 1515: reward = 193.00, steps = 193\n",
      "00:03:04 [DEBUG] train episode 1516: reward = 140.00, steps = 140\n",
      "00:03:04 [DEBUG] train episode 1517: reward = 200.00, steps = 200\n",
      "00:03:04 [DEBUG] train episode 1518: reward = 125.00, steps = 125\n",
      "00:03:04 [DEBUG] train episode 1519: reward = 153.00, steps = 153\n",
      "00:03:04 [DEBUG] train episode 1520: reward = 141.00, steps = 141\n",
      "00:03:04 [DEBUG] train episode 1521: reward = 163.00, steps = 163\n",
      "00:03:04 [DEBUG] train episode 1522: reward = 151.00, steps = 151\n",
      "00:03:04 [DEBUG] train episode 1523: reward = 139.00, steps = 139\n",
      "00:03:04 [DEBUG] train episode 1524: reward = 101.00, steps = 101\n",
      "00:03:04 [DEBUG] train episode 1525: reward = 200.00, steps = 200\n",
      "00:03:04 [DEBUG] train episode 1526: reward = 148.00, steps = 148\n",
      "00:03:04 [DEBUG] train episode 1527: reward = 135.00, steps = 135\n",
      "00:03:04 [DEBUG] train episode 1528: reward = 150.00, steps = 150\n",
      "00:03:05 [DEBUG] train episode 1529: reward = 186.00, steps = 186\n",
      "00:03:05 [DEBUG] train episode 1530: reward = 176.00, steps = 176\n",
      "00:03:05 [DEBUG] train episode 1531: reward = 144.00, steps = 144\n",
      "00:03:05 [DEBUG] train episode 1532: reward = 179.00, steps = 179\n",
      "00:03:05 [DEBUG] train episode 1533: reward = 200.00, steps = 200\n",
      "00:03:05 [DEBUG] train episode 1534: reward = 108.00, steps = 108\n",
      "00:03:05 [DEBUG] train episode 1535: reward = 167.00, steps = 167\n",
      "00:03:05 [DEBUG] train episode 1536: reward = 133.00, steps = 133\n",
      "00:03:05 [DEBUG] train episode 1537: reward = 144.00, steps = 144\n",
      "00:03:05 [DEBUG] train episode 1538: reward = 134.00, steps = 134\n",
      "00:03:05 [DEBUG] train episode 1539: reward = 128.00, steps = 128\n",
      "00:03:05 [DEBUG] train episode 1540: reward = 123.00, steps = 123\n",
      "00:03:05 [DEBUG] train episode 1541: reward = 198.00, steps = 198\n",
      "00:03:05 [DEBUG] train episode 1542: reward = 151.00, steps = 151\n",
      "00:03:05 [DEBUG] train episode 1543: reward = 130.00, steps = 130\n",
      "00:03:05 [DEBUG] train episode 1544: reward = 133.00, steps = 133\n",
      "00:03:05 [DEBUG] train episode 1545: reward = 157.00, steps = 157\n",
      "00:03:05 [DEBUG] train episode 1546: reward = 161.00, steps = 161\n",
      "00:03:05 [DEBUG] train episode 1547: reward = 124.00, steps = 124\n",
      "00:03:05 [DEBUG] train episode 1548: reward = 117.00, steps = 117\n",
      "00:03:05 [DEBUG] train episode 1549: reward = 176.00, steps = 176\n",
      "00:03:05 [DEBUG] train episode 1550: reward = 112.00, steps = 112\n",
      "00:03:06 [DEBUG] train episode 1551: reward = 200.00, steps = 200\n",
      "00:03:06 [DEBUG] train episode 1552: reward = 111.00, steps = 111\n",
      "00:03:06 [DEBUG] train episode 1553: reward = 118.00, steps = 118\n",
      "00:03:06 [DEBUG] train episode 1554: reward = 114.00, steps = 114\n",
      "00:03:06 [DEBUG] train episode 1555: reward = 150.00, steps = 150\n",
      "00:03:06 [DEBUG] train episode 1556: reward = 149.00, steps = 149\n",
      "00:03:06 [DEBUG] train episode 1557: reward = 117.00, steps = 117\n",
      "00:03:06 [DEBUG] train episode 1558: reward = 105.00, steps = 105\n",
      "00:03:06 [DEBUG] train episode 1559: reward = 159.00, steps = 159\n",
      "00:03:06 [DEBUG] train episode 1560: reward = 122.00, steps = 122\n",
      "00:03:06 [DEBUG] train episode 1561: reward = 112.00, steps = 112\n",
      "00:03:06 [DEBUG] train episode 1562: reward = 122.00, steps = 122\n",
      "00:03:06 [DEBUG] train episode 1563: reward = 98.00, steps = 98\n",
      "00:03:06 [DEBUG] train episode 1564: reward = 131.00, steps = 131\n",
      "00:03:06 [DEBUG] train episode 1565: reward = 100.00, steps = 100\n",
      "00:03:06 [DEBUG] train episode 1566: reward = 163.00, steps = 163\n",
      "00:03:06 [DEBUG] train episode 1567: reward = 120.00, steps = 120\n",
      "00:03:06 [DEBUG] train episode 1568: reward = 97.00, steps = 97\n",
      "00:03:06 [DEBUG] train episode 1569: reward = 146.00, steps = 146\n",
      "00:03:06 [DEBUG] train episode 1570: reward = 157.00, steps = 157\n",
      "00:03:06 [DEBUG] train episode 1571: reward = 200.00, steps = 200\n",
      "00:03:06 [DEBUG] train episode 1572: reward = 120.00, steps = 120\n",
      "00:03:06 [DEBUG] train episode 1573: reward = 135.00, steps = 135\n",
      "00:03:06 [DEBUG] train episode 1574: reward = 138.00, steps = 138\n",
      "00:03:06 [DEBUG] train episode 1575: reward = 118.00, steps = 118\n",
      "00:03:07 [DEBUG] train episode 1576: reward = 149.00, steps = 149\n",
      "00:03:07 [DEBUG] train episode 1577: reward = 100.00, steps = 100\n",
      "00:03:07 [DEBUG] train episode 1578: reward = 121.00, steps = 121\n",
      "00:03:07 [DEBUG] train episode 1579: reward = 86.00, steps = 86\n",
      "00:03:07 [DEBUG] train episode 1580: reward = 121.00, steps = 121\n",
      "00:03:07 [DEBUG] train episode 1581: reward = 114.00, steps = 114\n",
      "00:03:07 [DEBUG] train episode 1582: reward = 137.00, steps = 137\n",
      "00:03:07 [DEBUG] train episode 1583: reward = 172.00, steps = 172\n",
      "00:03:07 [DEBUG] train episode 1584: reward = 132.00, steps = 132\n",
      "00:03:07 [DEBUG] train episode 1585: reward = 158.00, steps = 158\n",
      "00:03:07 [DEBUG] train episode 1586: reward = 116.00, steps = 116\n",
      "00:03:07 [DEBUG] train episode 1587: reward = 170.00, steps = 170\n",
      "00:03:07 [DEBUG] train episode 1588: reward = 132.00, steps = 132\n",
      "00:03:07 [DEBUG] train episode 1589: reward = 176.00, steps = 176\n",
      "00:03:07 [DEBUG] train episode 1590: reward = 119.00, steps = 119\n",
      "00:03:07 [DEBUG] train episode 1591: reward = 130.00, steps = 130\n",
      "00:03:07 [DEBUG] train episode 1592: reward = 200.00, steps = 200\n",
      "00:03:07 [DEBUG] train episode 1593: reward = 105.00, steps = 105\n",
      "00:03:07 [DEBUG] train episode 1594: reward = 103.00, steps = 103\n",
      "00:03:07 [DEBUG] train episode 1595: reward = 186.00, steps = 186\n",
      "00:03:07 [DEBUG] train episode 1596: reward = 118.00, steps = 118\n",
      "00:03:07 [DEBUG] train episode 1597: reward = 147.00, steps = 147\n",
      "00:03:07 [DEBUG] train episode 1598: reward = 160.00, steps = 160\n",
      "00:03:08 [DEBUG] train episode 1599: reward = 142.00, steps = 142\n",
      "00:03:08 [DEBUG] train episode 1600: reward = 138.00, steps = 138\n",
      "00:03:08 [DEBUG] train episode 1601: reward = 112.00, steps = 112\n",
      "00:03:08 [DEBUG] train episode 1602: reward = 143.00, steps = 143\n",
      "00:03:08 [DEBUG] train episode 1603: reward = 200.00, steps = 200\n",
      "00:03:08 [DEBUG] train episode 1604: reward = 97.00, steps = 97\n",
      "00:03:08 [DEBUG] train episode 1605: reward = 198.00, steps = 198\n",
      "00:03:08 [DEBUG] train episode 1606: reward = 138.00, steps = 138\n",
      "00:03:08 [DEBUG] train episode 1607: reward = 199.00, steps = 199\n",
      "00:03:08 [DEBUG] train episode 1608: reward = 176.00, steps = 176\n",
      "00:03:08 [DEBUG] train episode 1609: reward = 120.00, steps = 120\n",
      "00:03:08 [DEBUG] train episode 1610: reward = 157.00, steps = 157\n",
      "00:03:08 [DEBUG] train episode 1611: reward = 167.00, steps = 167\n",
      "00:03:08 [DEBUG] train episode 1612: reward = 164.00, steps = 164\n",
      "00:03:08 [DEBUG] train episode 1613: reward = 168.00, steps = 168\n",
      "00:03:08 [DEBUG] train episode 1614: reward = 148.00, steps = 148\n",
      "00:03:08 [DEBUG] train episode 1615: reward = 145.00, steps = 145\n",
      "00:03:08 [DEBUG] train episode 1616: reward = 108.00, steps = 108\n",
      "00:03:08 [DEBUG] train episode 1617: reward = 135.00, steps = 135\n",
      "00:03:08 [DEBUG] train episode 1618: reward = 148.00, steps = 148\n",
      "00:03:08 [DEBUG] train episode 1619: reward = 143.00, steps = 143\n",
      "00:03:08 [DEBUG] train episode 1620: reward = 131.00, steps = 131\n",
      "00:03:08 [DEBUG] train episode 1621: reward = 127.00, steps = 127\n",
      "00:03:09 [DEBUG] train episode 1622: reward = 160.00, steps = 160\n",
      "00:03:09 [DEBUG] train episode 1623: reward = 125.00, steps = 125\n",
      "00:03:09 [DEBUG] train episode 1624: reward = 113.00, steps = 113\n",
      "00:03:09 [DEBUG] train episode 1625: reward = 109.00, steps = 109\n",
      "00:03:09 [DEBUG] train episode 1626: reward = 117.00, steps = 117\n",
      "00:03:09 [DEBUG] train episode 1627: reward = 125.00, steps = 125\n",
      "00:03:09 [DEBUG] train episode 1628: reward = 119.00, steps = 119\n",
      "00:03:09 [DEBUG] train episode 1629: reward = 90.00, steps = 90\n",
      "00:03:09 [DEBUG] train episode 1630: reward = 99.00, steps = 99\n",
      "00:03:09 [DEBUG] train episode 1631: reward = 102.00, steps = 102\n",
      "00:03:09 [DEBUG] train episode 1632: reward = 102.00, steps = 102\n",
      "00:03:09 [DEBUG] train episode 1633: reward = 154.00, steps = 154\n",
      "00:03:09 [DEBUG] train episode 1634: reward = 200.00, steps = 200\n",
      "00:03:09 [DEBUG] train episode 1635: reward = 145.00, steps = 145\n",
      "00:03:09 [DEBUG] train episode 1636: reward = 108.00, steps = 108\n",
      "00:03:09 [DEBUG] train episode 1637: reward = 104.00, steps = 104\n",
      "00:03:09 [DEBUG] train episode 1638: reward = 138.00, steps = 138\n",
      "00:03:09 [DEBUG] train episode 1639: reward = 152.00, steps = 152\n",
      "00:03:09 [DEBUG] train episode 1640: reward = 112.00, steps = 112\n",
      "00:03:09 [DEBUG] train episode 1641: reward = 105.00, steps = 105\n",
      "00:03:09 [DEBUG] train episode 1642: reward = 120.00, steps = 120\n",
      "00:03:09 [DEBUG] train episode 1643: reward = 200.00, steps = 200\n",
      "00:03:09 [DEBUG] train episode 1644: reward = 132.00, steps = 132\n",
      "00:03:09 [DEBUG] train episode 1645: reward = 141.00, steps = 141\n",
      "00:03:09 [DEBUG] train episode 1646: reward = 130.00, steps = 130\n",
      "00:03:10 [DEBUG] train episode 1647: reward = 112.00, steps = 112\n",
      "00:03:10 [DEBUG] train episode 1648: reward = 171.00, steps = 171\n",
      "00:03:10 [DEBUG] train episode 1649: reward = 130.00, steps = 130\n",
      "00:03:10 [DEBUG] train episode 1650: reward = 100.00, steps = 100\n",
      "00:03:10 [DEBUG] train episode 1651: reward = 128.00, steps = 128\n",
      "00:03:10 [DEBUG] train episode 1652: reward = 146.00, steps = 146\n",
      "00:03:10 [DEBUG] train episode 1653: reward = 121.00, steps = 121\n",
      "00:03:10 [DEBUG] train episode 1654: reward = 129.00, steps = 129\n",
      "00:03:10 [DEBUG] train episode 1655: reward = 130.00, steps = 130\n",
      "00:03:10 [DEBUG] train episode 1656: reward = 137.00, steps = 137\n",
      "00:03:10 [DEBUG] train episode 1657: reward = 132.00, steps = 132\n",
      "00:03:10 [DEBUG] train episode 1658: reward = 121.00, steps = 121\n",
      "00:03:10 [DEBUG] train episode 1659: reward = 160.00, steps = 160\n",
      "00:03:10 [DEBUG] train episode 1660: reward = 149.00, steps = 149\n",
      "00:03:10 [DEBUG] train episode 1661: reward = 143.00, steps = 143\n",
      "00:03:10 [DEBUG] train episode 1662: reward = 101.00, steps = 101\n",
      "00:03:10 [DEBUG] train episode 1663: reward = 136.00, steps = 136\n",
      "00:03:10 [DEBUG] train episode 1664: reward = 200.00, steps = 200\n",
      "00:03:10 [DEBUG] train episode 1665: reward = 150.00, steps = 150\n",
      "00:03:10 [DEBUG] train episode 1666: reward = 162.00, steps = 162\n",
      "00:03:10 [DEBUG] train episode 1667: reward = 143.00, steps = 143\n",
      "00:03:10 [DEBUG] train episode 1668: reward = 124.00, steps = 124\n",
      "00:03:10 [DEBUG] train episode 1669: reward = 114.00, steps = 114\n",
      "00:03:10 [DEBUG] train episode 1670: reward = 105.00, steps = 105\n",
      "00:03:11 [DEBUG] train episode 1671: reward = 147.00, steps = 147\n",
      "00:03:11 [DEBUG] train episode 1672: reward = 151.00, steps = 151\n",
      "00:03:11 [DEBUG] train episode 1673: reward = 158.00, steps = 158\n",
      "00:03:11 [DEBUG] train episode 1674: reward = 140.00, steps = 140\n",
      "00:03:11 [DEBUG] train episode 1675: reward = 116.00, steps = 116\n",
      "00:03:11 [DEBUG] train episode 1676: reward = 89.00, steps = 89\n",
      "00:03:11 [DEBUG] train episode 1677: reward = 136.00, steps = 136\n",
      "00:03:11 [DEBUG] train episode 1678: reward = 200.00, steps = 200\n",
      "00:03:11 [DEBUG] train episode 1679: reward = 125.00, steps = 125\n",
      "00:03:11 [DEBUG] train episode 1680: reward = 137.00, steps = 137\n",
      "00:03:11 [DEBUG] train episode 1681: reward = 181.00, steps = 181\n",
      "00:03:11 [DEBUG] train episode 1682: reward = 200.00, steps = 200\n",
      "00:03:11 [DEBUG] train episode 1683: reward = 200.00, steps = 200\n",
      "00:03:11 [DEBUG] train episode 1684: reward = 177.00, steps = 177\n",
      "00:03:11 [DEBUG] train episode 1685: reward = 138.00, steps = 138\n",
      "00:03:11 [DEBUG] train episode 1686: reward = 165.00, steps = 165\n",
      "00:03:11 [DEBUG] train episode 1687: reward = 113.00, steps = 113\n",
      "00:03:11 [DEBUG] train episode 1688: reward = 125.00, steps = 125\n",
      "00:03:11 [DEBUG] train episode 1689: reward = 200.00, steps = 200\n",
      "00:03:11 [DEBUG] train episode 1690: reward = 130.00, steps = 130\n",
      "00:03:11 [DEBUG] train episode 1691: reward = 166.00, steps = 166\n",
      "00:03:11 [DEBUG] train episode 1692: reward = 161.00, steps = 161\n",
      "00:03:12 [DEBUG] train episode 1693: reward = 193.00, steps = 193\n",
      "00:03:12 [DEBUG] train episode 1694: reward = 113.00, steps = 113\n",
      "00:03:12 [DEBUG] train episode 1695: reward = 104.00, steps = 104\n",
      "00:03:12 [DEBUG] train episode 1696: reward = 175.00, steps = 175\n",
      "00:03:12 [DEBUG] train episode 1697: reward = 135.00, steps = 135\n",
      "00:03:12 [DEBUG] train episode 1698: reward = 117.00, steps = 117\n",
      "00:03:12 [DEBUG] train episode 1699: reward = 158.00, steps = 158\n",
      "00:03:12 [DEBUG] train episode 1700: reward = 147.00, steps = 147\n",
      "00:03:12 [DEBUG] train episode 1701: reward = 122.00, steps = 122\n",
      "00:03:12 [DEBUG] train episode 1702: reward = 200.00, steps = 200\n",
      "00:03:12 [DEBUG] train episode 1703: reward = 132.00, steps = 132\n",
      "00:03:12 [DEBUG] train episode 1704: reward = 193.00, steps = 193\n",
      "00:03:12 [DEBUG] train episode 1705: reward = 198.00, steps = 198\n",
      "00:03:12 [DEBUG] train episode 1706: reward = 192.00, steps = 192\n",
      "00:03:12 [DEBUG] train episode 1707: reward = 200.00, steps = 200\n",
      "00:03:12 [DEBUG] train episode 1708: reward = 129.00, steps = 129\n",
      "00:03:12 [DEBUG] train episode 1709: reward = 144.00, steps = 144\n",
      "00:03:12 [DEBUG] train episode 1710: reward = 138.00, steps = 138\n",
      "00:03:12 [DEBUG] train episode 1711: reward = 149.00, steps = 149\n",
      "00:03:12 [DEBUG] train episode 1712: reward = 111.00, steps = 111\n",
      "00:03:12 [DEBUG] train episode 1713: reward = 117.00, steps = 117\n",
      "00:03:12 [DEBUG] train episode 1714: reward = 134.00, steps = 134\n",
      "00:03:13 [DEBUG] train episode 1715: reward = 136.00, steps = 136\n",
      "00:03:13 [DEBUG] train episode 1716: reward = 174.00, steps = 174\n",
      "00:03:13 [DEBUG] train episode 1717: reward = 165.00, steps = 165\n",
      "00:03:13 [DEBUG] train episode 1718: reward = 113.00, steps = 113\n",
      "00:03:13 [DEBUG] train episode 1719: reward = 110.00, steps = 110\n",
      "00:03:13 [DEBUG] train episode 1720: reward = 184.00, steps = 184\n",
      "00:03:13 [DEBUG] train episode 1721: reward = 133.00, steps = 133\n",
      "00:03:13 [DEBUG] train episode 1722: reward = 194.00, steps = 194\n",
      "00:03:13 [DEBUG] train episode 1723: reward = 139.00, steps = 139\n",
      "00:03:13 [DEBUG] train episode 1724: reward = 139.00, steps = 139\n",
      "00:03:13 [DEBUG] train episode 1725: reward = 127.00, steps = 127\n",
      "00:03:13 [DEBUG] train episode 1726: reward = 135.00, steps = 135\n",
      "00:03:13 [DEBUG] train episode 1727: reward = 153.00, steps = 153\n",
      "00:03:13 [DEBUG] train episode 1728: reward = 112.00, steps = 112\n",
      "00:03:13 [DEBUG] train episode 1729: reward = 200.00, steps = 200\n",
      "00:03:13 [DEBUG] train episode 1730: reward = 134.00, steps = 134\n",
      "00:03:13 [DEBUG] train episode 1731: reward = 146.00, steps = 146\n",
      "00:03:13 [DEBUG] train episode 1732: reward = 200.00, steps = 200\n",
      "00:03:13 [DEBUG] train episode 1733: reward = 139.00, steps = 139\n",
      "00:03:13 [DEBUG] train episode 1734: reward = 125.00, steps = 125\n",
      "00:03:14 [DEBUG] train episode 1735: reward = 163.00, steps = 163\n",
      "00:03:14 [DEBUG] train episode 1736: reward = 120.00, steps = 120\n",
      "00:03:14 [DEBUG] train episode 1737: reward = 163.00, steps = 163\n",
      "00:03:14 [DEBUG] train episode 1738: reward = 135.00, steps = 135\n",
      "00:03:14 [DEBUG] train episode 1739: reward = 103.00, steps = 103\n",
      "00:03:14 [DEBUG] train episode 1740: reward = 108.00, steps = 108\n",
      "00:03:14 [DEBUG] train episode 1741: reward = 200.00, steps = 200\n",
      "00:03:14 [DEBUG] train episode 1742: reward = 178.00, steps = 178\n",
      "00:03:14 [DEBUG] train episode 1743: reward = 133.00, steps = 133\n",
      "00:03:14 [DEBUG] train episode 1744: reward = 147.00, steps = 147\n",
      "00:03:14 [DEBUG] train episode 1745: reward = 148.00, steps = 148\n",
      "00:03:14 [DEBUG] train episode 1746: reward = 119.00, steps = 119\n",
      "00:03:14 [DEBUG] train episode 1747: reward = 129.00, steps = 129\n",
      "00:03:14 [DEBUG] train episode 1748: reward = 160.00, steps = 160\n",
      "00:03:14 [DEBUG] train episode 1749: reward = 168.00, steps = 168\n",
      "00:03:14 [DEBUG] train episode 1750: reward = 112.00, steps = 112\n",
      "00:03:14 [DEBUG] train episode 1751: reward = 200.00, steps = 200\n",
      "00:03:14 [DEBUG] train episode 1752: reward = 200.00, steps = 200\n",
      "00:03:14 [DEBUG] train episode 1753: reward = 200.00, steps = 200\n",
      "00:03:14 [DEBUG] train episode 1754: reward = 200.00, steps = 200\n",
      "00:03:14 [DEBUG] train episode 1755: reward = 148.00, steps = 148\n",
      "00:03:14 [DEBUG] train episode 1756: reward = 156.00, steps = 156\n",
      "00:03:15 [DEBUG] train episode 1757: reward = 125.00, steps = 125\n",
      "00:03:15 [DEBUG] train episode 1758: reward = 149.00, steps = 149\n",
      "00:03:15 [DEBUG] train episode 1759: reward = 123.00, steps = 123\n",
      "00:03:15 [DEBUG] train episode 1760: reward = 140.00, steps = 140\n",
      "00:03:15 [DEBUG] train episode 1761: reward = 116.00, steps = 116\n",
      "00:03:15 [DEBUG] train episode 1762: reward = 130.00, steps = 130\n",
      "00:03:15 [DEBUG] train episode 1763: reward = 172.00, steps = 172\n",
      "00:03:15 [DEBUG] train episode 1764: reward = 180.00, steps = 180\n",
      "00:03:15 [DEBUG] train episode 1765: reward = 114.00, steps = 114\n",
      "00:03:15 [DEBUG] train episode 1766: reward = 174.00, steps = 174\n",
      "00:03:15 [DEBUG] train episode 1767: reward = 181.00, steps = 181\n",
      "00:03:15 [DEBUG] train episode 1768: reward = 103.00, steps = 103\n",
      "00:03:15 [DEBUG] train episode 1769: reward = 200.00, steps = 200\n",
      "00:03:15 [DEBUG] train episode 1770: reward = 200.00, steps = 200\n",
      "00:03:15 [DEBUG] train episode 1771: reward = 164.00, steps = 164\n",
      "00:03:15 [DEBUG] train episode 1772: reward = 117.00, steps = 117\n",
      "00:03:15 [DEBUG] train episode 1773: reward = 182.00, steps = 182\n",
      "00:03:15 [DEBUG] train episode 1774: reward = 200.00, steps = 200\n",
      "00:03:15 [DEBUG] train episode 1775: reward = 189.00, steps = 189\n",
      "00:03:15 [DEBUG] train episode 1776: reward = 119.00, steps = 119\n",
      "00:03:15 [DEBUG] train episode 1777: reward = 169.00, steps = 169\n",
      "00:03:16 [DEBUG] train episode 1778: reward = 157.00, steps = 157\n",
      "00:03:16 [DEBUG] train episode 1779: reward = 140.00, steps = 140\n",
      "00:03:16 [DEBUG] train episode 1780: reward = 172.00, steps = 172\n",
      "00:03:16 [DEBUG] train episode 1781: reward = 200.00, steps = 200\n",
      "00:03:16 [DEBUG] train episode 1782: reward = 172.00, steps = 172\n",
      "00:03:16 [DEBUG] train episode 1783: reward = 137.00, steps = 137\n",
      "00:03:16 [DEBUG] train episode 1784: reward = 200.00, steps = 200\n",
      "00:03:16 [DEBUG] train episode 1785: reward = 140.00, steps = 140\n",
      "00:03:16 [DEBUG] train episode 1786: reward = 151.00, steps = 151\n",
      "00:03:16 [DEBUG] train episode 1787: reward = 164.00, steps = 164\n",
      "00:03:16 [DEBUG] train episode 1788: reward = 173.00, steps = 173\n",
      "00:03:16 [DEBUG] train episode 1789: reward = 187.00, steps = 187\n",
      "00:03:16 [DEBUG] train episode 1790: reward = 200.00, steps = 200\n",
      "00:03:16 [DEBUG] train episode 1791: reward = 200.00, steps = 200\n",
      "00:03:16 [DEBUG] train episode 1792: reward = 191.00, steps = 191\n",
      "00:03:16 [DEBUG] train episode 1793: reward = 200.00, steps = 200\n",
      "00:03:16 [DEBUG] train episode 1794: reward = 200.00, steps = 200\n",
      "00:03:16 [DEBUG] train episode 1795: reward = 173.00, steps = 173\n",
      "00:03:16 [DEBUG] train episode 1796: reward = 200.00, steps = 200\n",
      "00:03:17 [DEBUG] train episode 1797: reward = 200.00, steps = 200\n",
      "00:03:17 [DEBUG] train episode 1798: reward = 178.00, steps = 178\n",
      "00:03:17 [DEBUG] train episode 1799: reward = 138.00, steps = 138\n",
      "00:03:17 [DEBUG] train episode 1800: reward = 144.00, steps = 144\n",
      "00:03:17 [DEBUG] train episode 1801: reward = 187.00, steps = 187\n",
      "00:03:17 [DEBUG] train episode 1802: reward = 125.00, steps = 125\n",
      "00:03:17 [DEBUG] train episode 1803: reward = 200.00, steps = 200\n",
      "00:03:17 [DEBUG] train episode 1804: reward = 178.00, steps = 178\n",
      "00:03:17 [DEBUG] train episode 1805: reward = 164.00, steps = 164\n",
      "00:03:17 [DEBUG] train episode 1806: reward = 194.00, steps = 194\n",
      "00:03:17 [DEBUG] train episode 1807: reward = 151.00, steps = 151\n",
      "00:03:17 [DEBUG] train episode 1808: reward = 200.00, steps = 200\n",
      "00:03:17 [DEBUG] train episode 1809: reward = 189.00, steps = 189\n",
      "00:03:17 [DEBUG] train episode 1810: reward = 200.00, steps = 200\n",
      "00:03:17 [DEBUG] train episode 1811: reward = 167.00, steps = 167\n",
      "00:03:17 [DEBUG] train episode 1812: reward = 154.00, steps = 154\n",
      "00:03:17 [DEBUG] train episode 1813: reward = 125.00, steps = 125\n",
      "00:03:17 [DEBUG] train episode 1814: reward = 200.00, steps = 200\n",
      "00:03:18 [DEBUG] train episode 1815: reward = 200.00, steps = 200\n",
      "00:03:18 [DEBUG] train episode 1816: reward = 113.00, steps = 113\n",
      "00:03:18 [DEBUG] train episode 1817: reward = 188.00, steps = 188\n",
      "00:03:18 [DEBUG] train episode 1818: reward = 114.00, steps = 114\n",
      "00:03:18 [DEBUG] train episode 1819: reward = 200.00, steps = 200\n",
      "00:03:18 [DEBUG] train episode 1820: reward = 172.00, steps = 172\n",
      "00:03:18 [DEBUG] train episode 1821: reward = 184.00, steps = 184\n",
      "00:03:18 [DEBUG] train episode 1822: reward = 200.00, steps = 200\n",
      "00:03:18 [DEBUG] train episode 1823: reward = 174.00, steps = 174\n",
      "00:03:18 [DEBUG] train episode 1824: reward = 132.00, steps = 132\n",
      "00:03:18 [DEBUG] train episode 1825: reward = 156.00, steps = 156\n",
      "00:03:18 [DEBUG] train episode 1826: reward = 182.00, steps = 182\n",
      "00:03:18 [DEBUG] train episode 1827: reward = 144.00, steps = 144\n",
      "00:03:18 [DEBUG] train episode 1828: reward = 165.00, steps = 165\n",
      "00:03:18 [DEBUG] train episode 1829: reward = 127.00, steps = 127\n",
      "00:03:18 [DEBUG] train episode 1830: reward = 105.00, steps = 105\n",
      "00:03:18 [DEBUG] train episode 1831: reward = 200.00, steps = 200\n",
      "00:03:18 [DEBUG] train episode 1832: reward = 179.00, steps = 179\n",
      "00:03:18 [DEBUG] train episode 1833: reward = 200.00, steps = 200\n",
      "00:03:18 [DEBUG] train episode 1834: reward = 155.00, steps = 155\n",
      "00:03:19 [DEBUG] train episode 1835: reward = 200.00, steps = 200\n",
      "00:03:19 [DEBUG] train episode 1836: reward = 158.00, steps = 158\n",
      "00:03:19 [DEBUG] train episode 1837: reward = 200.00, steps = 200\n",
      "00:03:19 [DEBUG] train episode 1838: reward = 146.00, steps = 146\n",
      "00:03:19 [DEBUG] train episode 1839: reward = 125.00, steps = 125\n",
      "00:03:19 [DEBUG] train episode 1840: reward = 125.00, steps = 125\n",
      "00:03:19 [DEBUG] train episode 1841: reward = 164.00, steps = 164\n",
      "00:03:19 [DEBUG] train episode 1842: reward = 200.00, steps = 200\n",
      "00:03:19 [DEBUG] train episode 1843: reward = 200.00, steps = 200\n",
      "00:03:19 [DEBUG] train episode 1844: reward = 166.00, steps = 166\n",
      "00:03:19 [DEBUG] train episode 1845: reward = 200.00, steps = 200\n",
      "00:03:19 [DEBUG] train episode 1846: reward = 158.00, steps = 158\n",
      "00:03:19 [DEBUG] train episode 1847: reward = 129.00, steps = 129\n",
      "00:03:19 [DEBUG] train episode 1848: reward = 200.00, steps = 200\n",
      "00:03:19 [DEBUG] train episode 1849: reward = 136.00, steps = 136\n",
      "00:03:19 [DEBUG] train episode 1850: reward = 151.00, steps = 151\n",
      "00:03:19 [DEBUG] train episode 1851: reward = 117.00, steps = 117\n",
      "00:03:19 [DEBUG] train episode 1852: reward = 200.00, steps = 200\n",
      "00:03:19 [DEBUG] train episode 1853: reward = 152.00, steps = 152\n",
      "00:03:19 [DEBUG] train episode 1854: reward = 114.00, steps = 114\n",
      "00:03:19 [DEBUG] train episode 1855: reward = 156.00, steps = 156\n",
      "00:03:20 [DEBUG] train episode 1856: reward = 161.00, steps = 161\n",
      "00:03:20 [DEBUG] train episode 1857: reward = 128.00, steps = 128\n",
      "00:03:20 [DEBUG] train episode 1858: reward = 121.00, steps = 121\n",
      "00:03:20 [DEBUG] train episode 1859: reward = 156.00, steps = 156\n",
      "00:03:20 [DEBUG] train episode 1860: reward = 149.00, steps = 149\n",
      "00:03:20 [DEBUG] train episode 1861: reward = 164.00, steps = 164\n",
      "00:03:20 [DEBUG] train episode 1862: reward = 162.00, steps = 162\n",
      "00:03:20 [DEBUG] train episode 1863: reward = 151.00, steps = 151\n",
      "00:03:20 [DEBUG] train episode 1864: reward = 114.00, steps = 114\n",
      "00:03:20 [DEBUG] train episode 1865: reward = 170.00, steps = 170\n",
      "00:03:20 [DEBUG] train episode 1866: reward = 159.00, steps = 159\n",
      "00:03:20 [DEBUG] train episode 1867: reward = 148.00, steps = 148\n",
      "00:03:20 [DEBUG] train episode 1868: reward = 200.00, steps = 200\n",
      "00:03:20 [DEBUG] train episode 1869: reward = 200.00, steps = 200\n",
      "00:03:20 [DEBUG] train episode 1870: reward = 161.00, steps = 161\n",
      "00:03:20 [DEBUG] train episode 1871: reward = 151.00, steps = 151\n",
      "00:03:20 [DEBUG] train episode 1872: reward = 200.00, steps = 200\n",
      "00:03:20 [DEBUG] train episode 1873: reward = 119.00, steps = 119\n",
      "00:03:20 [DEBUG] train episode 1874: reward = 200.00, steps = 200\n",
      "00:03:20 [DEBUG] train episode 1875: reward = 172.00, steps = 172\n",
      "00:03:21 [DEBUG] train episode 1876: reward = 158.00, steps = 158\n",
      "00:03:21 [DEBUG] train episode 1877: reward = 200.00, steps = 200\n",
      "00:03:21 [DEBUG] train episode 1878: reward = 191.00, steps = 191\n",
      "00:03:21 [DEBUG] train episode 1879: reward = 134.00, steps = 134\n",
      "00:03:21 [DEBUG] train episode 1880: reward = 149.00, steps = 149\n",
      "00:03:21 [DEBUG] train episode 1881: reward = 188.00, steps = 188\n",
      "00:03:21 [DEBUG] train episode 1882: reward = 157.00, steps = 157\n",
      "00:03:21 [DEBUG] train episode 1883: reward = 194.00, steps = 194\n",
      "00:03:21 [DEBUG] train episode 1884: reward = 194.00, steps = 194\n",
      "00:03:21 [DEBUG] train episode 1885: reward = 194.00, steps = 194\n",
      "00:03:21 [DEBUG] train episode 1886: reward = 113.00, steps = 113\n",
      "00:03:21 [DEBUG] train episode 1887: reward = 172.00, steps = 172\n",
      "00:03:21 [DEBUG] train episode 1888: reward = 165.00, steps = 165\n",
      "00:03:21 [DEBUG] train episode 1889: reward = 148.00, steps = 148\n",
      "00:03:21 [DEBUG] train episode 1890: reward = 179.00, steps = 179\n",
      "00:03:21 [DEBUG] train episode 1891: reward = 159.00, steps = 159\n",
      "00:03:21 [DEBUG] train episode 1892: reward = 138.00, steps = 138\n",
      "00:03:21 [DEBUG] train episode 1893: reward = 134.00, steps = 134\n",
      "00:03:21 [DEBUG] train episode 1894: reward = 138.00, steps = 138\n",
      "00:03:21 [DEBUG] train episode 1895: reward = 200.00, steps = 200\n",
      "00:03:22 [DEBUG] train episode 1896: reward = 162.00, steps = 162\n",
      "00:03:22 [DEBUG] train episode 1897: reward = 174.00, steps = 174\n",
      "00:03:22 [DEBUG] train episode 1898: reward = 160.00, steps = 160\n",
      "00:03:22 [DEBUG] train episode 1899: reward = 136.00, steps = 136\n",
      "00:03:22 [DEBUG] train episode 1900: reward = 121.00, steps = 121\n",
      "00:03:22 [DEBUG] train episode 1901: reward = 200.00, steps = 200\n",
      "00:03:22 [DEBUG] train episode 1902: reward = 200.00, steps = 200\n",
      "00:03:22 [DEBUG] train episode 1903: reward = 141.00, steps = 141\n",
      "00:03:22 [DEBUG] train episode 1904: reward = 161.00, steps = 161\n",
      "00:03:22 [DEBUG] train episode 1905: reward = 148.00, steps = 148\n",
      "00:03:22 [DEBUG] train episode 1906: reward = 149.00, steps = 149\n",
      "00:03:22 [DEBUG] train episode 1907: reward = 130.00, steps = 130\n",
      "00:03:22 [DEBUG] train episode 1908: reward = 151.00, steps = 151\n",
      "00:03:22 [DEBUG] train episode 1909: reward = 125.00, steps = 125\n",
      "00:03:22 [DEBUG] train episode 1910: reward = 171.00, steps = 171\n",
      "00:03:22 [DEBUG] train episode 1911: reward = 123.00, steps = 123\n",
      "00:03:22 [DEBUG] train episode 1912: reward = 181.00, steps = 181\n",
      "00:03:22 [DEBUG] train episode 1913: reward = 167.00, steps = 167\n",
      "00:03:22 [DEBUG] train episode 1914: reward = 200.00, steps = 200\n",
      "00:03:22 [DEBUG] train episode 1915: reward = 200.00, steps = 200\n",
      "00:03:22 [DEBUG] train episode 1916: reward = 200.00, steps = 200\n",
      "00:03:23 [DEBUG] train episode 1917: reward = 163.00, steps = 163\n",
      "00:03:23 [DEBUG] train episode 1918: reward = 200.00, steps = 200\n",
      "00:03:23 [DEBUG] train episode 1919: reward = 170.00, steps = 170\n",
      "00:03:23 [DEBUG] train episode 1920: reward = 186.00, steps = 186\n",
      "00:03:23 [DEBUG] train episode 1921: reward = 200.00, steps = 200\n",
      "00:03:23 [DEBUG] train episode 1922: reward = 163.00, steps = 163\n",
      "00:03:23 [DEBUG] train episode 1923: reward = 200.00, steps = 200\n",
      "00:03:23 [DEBUG] train episode 1924: reward = 165.00, steps = 165\n",
      "00:03:23 [DEBUG] train episode 1925: reward = 200.00, steps = 200\n",
      "00:03:23 [DEBUG] train episode 1926: reward = 200.00, steps = 200\n",
      "00:03:23 [DEBUG] train episode 1927: reward = 172.00, steps = 172\n",
      "00:03:23 [DEBUG] train episode 1928: reward = 193.00, steps = 193\n",
      "00:03:23 [DEBUG] train episode 1929: reward = 200.00, steps = 200\n",
      "00:03:23 [DEBUG] train episode 1930: reward = 180.00, steps = 180\n",
      "00:03:23 [DEBUG] train episode 1931: reward = 200.00, steps = 200\n",
      "00:03:23 [DEBUG] train episode 1932: reward = 163.00, steps = 163\n",
      "00:03:23 [DEBUG] train episode 1933: reward = 140.00, steps = 140\n",
      "00:03:23 [DEBUG] train episode 1934: reward = 200.00, steps = 200\n",
      "00:03:24 [DEBUG] train episode 1935: reward = 200.00, steps = 200\n",
      "00:03:24 [DEBUG] train episode 1936: reward = 178.00, steps = 178\n",
      "00:03:24 [DEBUG] train episode 1937: reward = 163.00, steps = 163\n",
      "00:03:24 [DEBUG] train episode 1938: reward = 175.00, steps = 175\n",
      "00:03:24 [DEBUG] train episode 1939: reward = 200.00, steps = 200\n",
      "00:03:24 [DEBUG] train episode 1940: reward = 200.00, steps = 200\n",
      "00:03:24 [DEBUG] train episode 1941: reward = 200.00, steps = 200\n",
      "00:03:24 [DEBUG] train episode 1942: reward = 200.00, steps = 200\n",
      "00:03:24 [DEBUG] train episode 1943: reward = 200.00, steps = 200\n",
      "00:03:24 [DEBUG] train episode 1944: reward = 153.00, steps = 153\n",
      "00:03:24 [DEBUG] train episode 1945: reward = 200.00, steps = 200\n",
      "00:03:24 [DEBUG] train episode 1946: reward = 132.00, steps = 132\n",
      "00:03:24 [DEBUG] train episode 1947: reward = 200.00, steps = 200\n",
      "00:03:24 [DEBUG] train episode 1948: reward = 200.00, steps = 200\n",
      "00:03:24 [DEBUG] train episode 1949: reward = 159.00, steps = 159\n",
      "00:03:24 [DEBUG] train episode 1950: reward = 138.00, steps = 138\n",
      "00:03:24 [DEBUG] train episode 1951: reward = 171.00, steps = 171\n",
      "00:03:24 [DEBUG] train episode 1952: reward = 124.00, steps = 124\n",
      "00:03:24 [DEBUG] train episode 1953: reward = 149.00, steps = 149\n",
      "00:03:24 [DEBUG] train episode 1954: reward = 127.00, steps = 127\n",
      "00:03:25 [DEBUG] train episode 1955: reward = 151.00, steps = 151\n",
      "00:03:25 [DEBUG] train episode 1956: reward = 146.00, steps = 146\n",
      "00:03:25 [DEBUG] train episode 1957: reward = 142.00, steps = 142\n",
      "00:03:25 [DEBUG] train episode 1958: reward = 146.00, steps = 146\n",
      "00:03:25 [DEBUG] train episode 1959: reward = 181.00, steps = 181\n",
      "00:03:25 [DEBUG] train episode 1960: reward = 167.00, steps = 167\n",
      "00:03:25 [DEBUG] train episode 1961: reward = 200.00, steps = 200\n",
      "00:03:25 [DEBUG] train episode 1962: reward = 184.00, steps = 184\n",
      "00:03:25 [DEBUG] train episode 1963: reward = 98.00, steps = 98\n",
      "00:03:25 [DEBUG] train episode 1964: reward = 136.00, steps = 136\n",
      "00:03:25 [DEBUG] train episode 1965: reward = 125.00, steps = 125\n",
      "00:03:25 [DEBUG] train episode 1966: reward = 165.00, steps = 165\n",
      "00:03:25 [DEBUG] train episode 1967: reward = 200.00, steps = 200\n",
      "00:03:25 [DEBUG] train episode 1968: reward = 190.00, steps = 190\n",
      "00:03:25 [DEBUG] train episode 1969: reward = 179.00, steps = 179\n",
      "00:03:25 [DEBUG] train episode 1970: reward = 134.00, steps = 134\n",
      "00:03:25 [DEBUG] train episode 1971: reward = 200.00, steps = 200\n",
      "00:03:25 [DEBUG] train episode 1972: reward = 200.00, steps = 200\n",
      "00:03:25 [DEBUG] train episode 1973: reward = 160.00, steps = 160\n",
      "00:03:26 [DEBUG] train episode 1974: reward = 182.00, steps = 182\n",
      "00:03:26 [DEBUG] train episode 1975: reward = 174.00, steps = 174\n",
      "00:03:26 [DEBUG] train episode 1976: reward = 136.00, steps = 136\n",
      "00:03:26 [DEBUG] train episode 1977: reward = 200.00, steps = 200\n",
      "00:03:26 [DEBUG] train episode 1978: reward = 119.00, steps = 119\n",
      "00:03:26 [DEBUG] train episode 1979: reward = 162.00, steps = 162\n",
      "00:03:26 [DEBUG] train episode 1980: reward = 135.00, steps = 135\n",
      "00:03:26 [DEBUG] train episode 1981: reward = 117.00, steps = 117\n",
      "00:03:26 [DEBUG] train episode 1982: reward = 145.00, steps = 145\n",
      "00:03:26 [DEBUG] train episode 1983: reward = 200.00, steps = 200\n",
      "00:03:26 [DEBUG] train episode 1984: reward = 200.00, steps = 200\n",
      "00:03:26 [DEBUG] train episode 1985: reward = 153.00, steps = 153\n",
      "00:03:26 [DEBUG] train episode 1986: reward = 200.00, steps = 200\n",
      "00:03:26 [DEBUG] train episode 1987: reward = 200.00, steps = 200\n",
      "00:03:26 [DEBUG] train episode 1988: reward = 137.00, steps = 137\n",
      "00:03:26 [DEBUG] train episode 1989: reward = 200.00, steps = 200\n",
      "00:03:26 [DEBUG] train episode 1990: reward = 190.00, steps = 190\n",
      "00:03:26 [DEBUG] train episode 1991: reward = 122.00, steps = 122\n",
      "00:03:26 [DEBUG] train episode 1992: reward = 119.00, steps = 119\n",
      "00:03:26 [DEBUG] train episode 1993: reward = 171.00, steps = 171\n",
      "00:03:26 [DEBUG] train episode 1994: reward = 125.00, steps = 125\n",
      "00:03:26 [DEBUG] train episode 1995: reward = 135.00, steps = 135\n",
      "00:03:27 [DEBUG] train episode 1996: reward = 133.00, steps = 133\n",
      "00:03:27 [DEBUG] train episode 1997: reward = 174.00, steps = 174\n",
      "00:03:27 [DEBUG] train episode 1998: reward = 137.00, steps = 137\n",
      "00:03:27 [DEBUG] train episode 1999: reward = 191.00, steps = 191\n",
      "00:03:27 [DEBUG] train episode 2000: reward = 178.00, steps = 178\n",
      "00:03:27 [DEBUG] train episode 2001: reward = 185.00, steps = 185\n",
      "00:03:27 [DEBUG] train episode 2002: reward = 144.00, steps = 144\n",
      "00:03:27 [DEBUG] train episode 2003: reward = 132.00, steps = 132\n",
      "00:03:27 [DEBUG] train episode 2004: reward = 200.00, steps = 200\n",
      "00:03:27 [DEBUG] train episode 2005: reward = 200.00, steps = 200\n",
      "00:03:27 [DEBUG] train episode 2006: reward = 200.00, steps = 200\n",
      "00:03:27 [DEBUG] train episode 2007: reward = 188.00, steps = 188\n",
      "00:03:27 [DEBUG] train episode 2008: reward = 140.00, steps = 140\n",
      "00:03:27 [DEBUG] train episode 2009: reward = 195.00, steps = 195\n",
      "00:03:27 [DEBUG] train episode 2010: reward = 119.00, steps = 119\n",
      "00:03:27 [DEBUG] train episode 2011: reward = 140.00, steps = 140\n",
      "00:03:27 [DEBUG] train episode 2012: reward = 168.00, steps = 168\n",
      "00:03:27 [DEBUG] train episode 2013: reward = 168.00, steps = 168\n",
      "00:03:27 [DEBUG] train episode 2014: reward = 173.00, steps = 173\n",
      "00:03:27 [DEBUG] train episode 2015: reward = 174.00, steps = 174\n",
      "00:03:28 [DEBUG] train episode 2016: reward = 122.00, steps = 122\n",
      "00:03:28 [DEBUG] train episode 2017: reward = 141.00, steps = 141\n",
      "00:03:28 [DEBUG] train episode 2018: reward = 200.00, steps = 200\n",
      "00:03:28 [DEBUG] train episode 2019: reward = 198.00, steps = 198\n",
      "00:03:28 [DEBUG] train episode 2020: reward = 200.00, steps = 200\n",
      "00:03:28 [DEBUG] train episode 2021: reward = 170.00, steps = 170\n",
      "00:03:28 [DEBUG] train episode 2022: reward = 159.00, steps = 159\n",
      "00:03:28 [DEBUG] train episode 2023: reward = 200.00, steps = 200\n",
      "00:03:28 [DEBUG] train episode 2024: reward = 200.00, steps = 200\n",
      "00:03:28 [DEBUG] train episode 2025: reward = 199.00, steps = 199\n",
      "00:03:28 [DEBUG] train episode 2026: reward = 124.00, steps = 124\n",
      "00:03:28 [DEBUG] train episode 2027: reward = 200.00, steps = 200\n",
      "00:03:28 [DEBUG] train episode 2028: reward = 129.00, steps = 129\n",
      "00:03:28 [DEBUG] train episode 2029: reward = 170.00, steps = 170\n",
      "00:03:28 [DEBUG] train episode 2030: reward = 139.00, steps = 139\n",
      "00:03:28 [DEBUG] train episode 2031: reward = 200.00, steps = 200\n",
      "00:03:28 [DEBUG] train episode 2032: reward = 140.00, steps = 140\n",
      "00:03:28 [DEBUG] train episode 2033: reward = 149.00, steps = 149\n",
      "00:03:28 [DEBUG] train episode 2034: reward = 146.00, steps = 146\n",
      "00:03:28 [DEBUG] train episode 2035: reward = 165.00, steps = 165\n",
      "00:03:29 [DEBUG] train episode 2036: reward = 175.00, steps = 175\n",
      "00:03:29 [DEBUG] train episode 2037: reward = 132.00, steps = 132\n",
      "00:03:29 [DEBUG] train episode 2038: reward = 200.00, steps = 200\n",
      "00:03:29 [DEBUG] train episode 2039: reward = 133.00, steps = 133\n",
      "00:03:29 [DEBUG] train episode 2040: reward = 200.00, steps = 200\n",
      "00:03:29 [DEBUG] train episode 2041: reward = 133.00, steps = 133\n",
      "00:03:29 [DEBUG] train episode 2042: reward = 140.00, steps = 140\n",
      "00:03:29 [DEBUG] train episode 2043: reward = 200.00, steps = 200\n",
      "00:03:29 [DEBUG] train episode 2044: reward = 200.00, steps = 200\n",
      "00:03:29 [DEBUG] train episode 2045: reward = 200.00, steps = 200\n",
      "00:03:29 [DEBUG] train episode 2046: reward = 200.00, steps = 200\n",
      "00:03:29 [DEBUG] train episode 2047: reward = 200.00, steps = 200\n",
      "00:03:29 [DEBUG] train episode 2048: reward = 179.00, steps = 179\n",
      "00:03:29 [DEBUG] train episode 2049: reward = 156.00, steps = 156\n",
      "00:03:29 [DEBUG] train episode 2050: reward = 181.00, steps = 181\n",
      "00:03:29 [DEBUG] train episode 2051: reward = 147.00, steps = 147\n",
      "00:03:29 [DEBUG] train episode 2052: reward = 200.00, steps = 200\n",
      "00:03:29 [DEBUG] train episode 2053: reward = 155.00, steps = 155\n",
      "00:03:30 [DEBUG] train episode 2054: reward = 200.00, steps = 200\n",
      "00:03:30 [DEBUG] train episode 2055: reward = 159.00, steps = 159\n",
      "00:03:30 [DEBUG] train episode 2056: reward = 172.00, steps = 172\n",
      "00:03:30 [DEBUG] train episode 2057: reward = 137.00, steps = 137\n",
      "00:03:30 [DEBUG] train episode 2058: reward = 134.00, steps = 134\n",
      "00:03:30 [DEBUG] train episode 2059: reward = 134.00, steps = 134\n",
      "00:03:30 [DEBUG] train episode 2060: reward = 142.00, steps = 142\n",
      "00:03:30 [DEBUG] train episode 2061: reward = 126.00, steps = 126\n",
      "00:03:30 [DEBUG] train episode 2062: reward = 122.00, steps = 122\n",
      "00:03:30 [DEBUG] train episode 2063: reward = 191.00, steps = 191\n",
      "00:03:30 [DEBUG] train episode 2064: reward = 149.00, steps = 149\n",
      "00:03:30 [DEBUG] train episode 2065: reward = 129.00, steps = 129\n",
      "00:03:30 [DEBUG] train episode 2066: reward = 200.00, steps = 200\n",
      "00:03:30 [DEBUG] train episode 2067: reward = 154.00, steps = 154\n",
      "00:03:30 [DEBUG] train episode 2068: reward = 200.00, steps = 200\n",
      "00:03:30 [DEBUG] train episode 2069: reward = 182.00, steps = 182\n",
      "00:03:30 [DEBUG] train episode 2070: reward = 190.00, steps = 190\n",
      "00:03:30 [DEBUG] train episode 2071: reward = 190.00, steps = 190\n",
      "00:03:30 [DEBUG] train episode 2072: reward = 132.00, steps = 132\n",
      "00:03:30 [DEBUG] train episode 2073: reward = 155.00, steps = 155\n",
      "00:03:30 [DEBUG] train episode 2074: reward = 200.00, steps = 200\n",
      "00:03:31 [DEBUG] train episode 2075: reward = 167.00, steps = 167\n",
      "00:03:31 [DEBUG] train episode 2076: reward = 135.00, steps = 135\n",
      "00:03:31 [DEBUG] train episode 2077: reward = 187.00, steps = 187\n",
      "00:03:31 [DEBUG] train episode 2078: reward = 120.00, steps = 120\n",
      "00:03:31 [DEBUG] train episode 2079: reward = 188.00, steps = 188\n",
      "00:03:31 [DEBUG] train episode 2080: reward = 119.00, steps = 119\n",
      "00:03:31 [DEBUG] train episode 2081: reward = 144.00, steps = 144\n",
      "00:03:31 [DEBUG] train episode 2082: reward = 156.00, steps = 156\n",
      "00:03:31 [DEBUG] train episode 2083: reward = 122.00, steps = 122\n",
      "00:03:31 [DEBUG] train episode 2084: reward = 134.00, steps = 134\n",
      "00:03:31 [DEBUG] train episode 2085: reward = 123.00, steps = 123\n",
      "00:03:31 [DEBUG] train episode 2086: reward = 120.00, steps = 120\n",
      "00:03:31 [DEBUG] train episode 2087: reward = 169.00, steps = 169\n",
      "00:03:31 [DEBUG] train episode 2088: reward = 151.00, steps = 151\n",
      "00:03:31 [DEBUG] train episode 2089: reward = 132.00, steps = 132\n",
      "00:03:31 [DEBUG] train episode 2090: reward = 114.00, steps = 114\n",
      "00:03:31 [DEBUG] train episode 2091: reward = 193.00, steps = 193\n",
      "00:03:31 [DEBUG] train episode 2092: reward = 135.00, steps = 135\n",
      "00:03:31 [DEBUG] train episode 2093: reward = 163.00, steps = 163\n",
      "00:03:31 [DEBUG] train episode 2094: reward = 200.00, steps = 200\n",
      "00:03:31 [DEBUG] train episode 2095: reward = 200.00, steps = 200\n",
      "00:03:31 [DEBUG] train episode 2096: reward = 200.00, steps = 200\n",
      "00:03:32 [DEBUG] train episode 2097: reward = 150.00, steps = 150\n",
      "00:03:32 [DEBUG] train episode 2098: reward = 200.00, steps = 200\n",
      "00:03:32 [DEBUG] train episode 2099: reward = 157.00, steps = 157\n",
      "00:03:32 [DEBUG] train episode 2100: reward = 121.00, steps = 121\n",
      "00:03:32 [DEBUG] train episode 2101: reward = 96.00, steps = 96\n",
      "00:03:32 [DEBUG] train episode 2102: reward = 124.00, steps = 124\n",
      "00:03:32 [DEBUG] train episode 2103: reward = 180.00, steps = 180\n",
      "00:03:32 [DEBUG] train episode 2104: reward = 111.00, steps = 111\n",
      "00:03:32 [DEBUG] train episode 2105: reward = 108.00, steps = 108\n",
      "00:03:32 [DEBUG] train episode 2106: reward = 113.00, steps = 113\n",
      "00:03:32 [DEBUG] train episode 2107: reward = 107.00, steps = 107\n",
      "00:03:32 [DEBUG] train episode 2108: reward = 104.00, steps = 104\n",
      "00:03:32 [DEBUG] train episode 2109: reward = 121.00, steps = 121\n",
      "00:03:32 [DEBUG] train episode 2110: reward = 169.00, steps = 169\n",
      "00:03:32 [DEBUG] train episode 2111: reward = 114.00, steps = 114\n",
      "00:03:32 [DEBUG] train episode 2112: reward = 116.00, steps = 116\n",
      "00:03:32 [DEBUG] train episode 2113: reward = 100.00, steps = 100\n",
      "00:03:32 [DEBUG] train episode 2114: reward = 107.00, steps = 107\n",
      "00:03:32 [DEBUG] train episode 2115: reward = 109.00, steps = 109\n",
      "00:03:32 [DEBUG] train episode 2116: reward = 170.00, steps = 170\n",
      "00:03:32 [DEBUG] train episode 2117: reward = 113.00, steps = 113\n",
      "00:03:32 [DEBUG] train episode 2118: reward = 109.00, steps = 109\n",
      "00:03:32 [DEBUG] train episode 2119: reward = 200.00, steps = 200\n",
      "00:03:32 [DEBUG] train episode 2120: reward = 113.00, steps = 113\n",
      "00:03:32 [DEBUG] train episode 2121: reward = 144.00, steps = 144\n",
      "00:03:33 [DEBUG] train episode 2122: reward = 165.00, steps = 165\n",
      "00:03:33 [DEBUG] train episode 2123: reward = 87.00, steps = 87\n",
      "00:03:33 [DEBUG] train episode 2124: reward = 176.00, steps = 176\n",
      "00:03:33 [DEBUG] train episode 2125: reward = 200.00, steps = 200\n",
      "00:03:33 [DEBUG] train episode 2126: reward = 99.00, steps = 99\n",
      "00:03:33 [DEBUG] train episode 2127: reward = 125.00, steps = 125\n",
      "00:03:33 [DEBUG] train episode 2128: reward = 103.00, steps = 103\n",
      "00:03:33 [DEBUG] train episode 2129: reward = 89.00, steps = 89\n",
      "00:03:33 [DEBUG] train episode 2130: reward = 164.00, steps = 164\n",
      "00:03:33 [DEBUG] train episode 2131: reward = 200.00, steps = 200\n",
      "00:03:33 [DEBUG] train episode 2132: reward = 104.00, steps = 104\n",
      "00:03:33 [DEBUG] train episode 2133: reward = 119.00, steps = 119\n",
      "00:03:33 [DEBUG] train episode 2134: reward = 200.00, steps = 200\n",
      "00:03:33 [DEBUG] train episode 2135: reward = 191.00, steps = 191\n",
      "00:03:33 [DEBUG] train episode 2136: reward = 200.00, steps = 200\n",
      "00:03:33 [DEBUG] train episode 2137: reward = 124.00, steps = 124\n",
      "00:03:33 [DEBUG] train episode 2138: reward = 200.00, steps = 200\n",
      "00:03:33 [DEBUG] train episode 2139: reward = 106.00, steps = 106\n",
      "00:03:33 [DEBUG] train episode 2140: reward = 107.00, steps = 107\n",
      "00:03:33 [DEBUG] train episode 2141: reward = 148.00, steps = 148\n",
      "00:03:33 [DEBUG] train episode 2142: reward = 200.00, steps = 200\n",
      "00:03:33 [DEBUG] train episode 2143: reward = 128.00, steps = 128\n",
      "00:03:34 [DEBUG] train episode 2144: reward = 200.00, steps = 200\n",
      "00:03:34 [DEBUG] train episode 2145: reward = 200.00, steps = 200\n",
      "00:03:34 [DEBUG] train episode 2146: reward = 123.00, steps = 123\n",
      "00:03:34 [DEBUG] train episode 2147: reward = 161.00, steps = 161\n",
      "00:03:34 [DEBUG] train episode 2148: reward = 83.00, steps = 83\n",
      "00:03:34 [DEBUG] train episode 2149: reward = 102.00, steps = 102\n",
      "00:03:34 [DEBUG] train episode 2150: reward = 127.00, steps = 127\n",
      "00:03:34 [DEBUG] train episode 2151: reward = 200.00, steps = 200\n",
      "00:03:34 [DEBUG] train episode 2152: reward = 120.00, steps = 120\n",
      "00:03:34 [DEBUG] train episode 2153: reward = 113.00, steps = 113\n",
      "00:03:34 [DEBUG] train episode 2154: reward = 110.00, steps = 110\n",
      "00:03:34 [DEBUG] train episode 2155: reward = 200.00, steps = 200\n",
      "00:03:34 [DEBUG] train episode 2156: reward = 86.00, steps = 86\n",
      "00:03:34 [DEBUG] train episode 2157: reward = 137.00, steps = 137\n",
      "00:03:34 [DEBUG] train episode 2158: reward = 146.00, steps = 146\n",
      "00:03:34 [DEBUG] train episode 2159: reward = 120.00, steps = 120\n",
      "00:03:34 [DEBUG] train episode 2160: reward = 117.00, steps = 117\n",
      "00:03:34 [DEBUG] train episode 2161: reward = 168.00, steps = 168\n",
      "00:03:34 [DEBUG] train episode 2162: reward = 147.00, steps = 147\n",
      "00:03:34 [DEBUG] train episode 2163: reward = 135.00, steps = 135\n",
      "00:03:34 [DEBUG] train episode 2164: reward = 127.00, steps = 127\n",
      "00:03:34 [DEBUG] train episode 2165: reward = 127.00, steps = 127\n",
      "00:03:34 [DEBUG] train episode 2166: reward = 109.00, steps = 109\n",
      "00:03:35 [DEBUG] train episode 2167: reward = 122.00, steps = 122\n",
      "00:03:35 [DEBUG] train episode 2168: reward = 116.00, steps = 116\n",
      "00:03:35 [DEBUG] train episode 2169: reward = 115.00, steps = 115\n",
      "00:03:35 [DEBUG] train episode 2170: reward = 118.00, steps = 118\n",
      "00:03:35 [DEBUG] train episode 2171: reward = 124.00, steps = 124\n",
      "00:03:35 [DEBUG] train episode 2172: reward = 145.00, steps = 145\n",
      "00:03:35 [DEBUG] train episode 2173: reward = 129.00, steps = 129\n",
      "00:03:35 [DEBUG] train episode 2174: reward = 200.00, steps = 200\n",
      "00:03:35 [DEBUG] train episode 2175: reward = 141.00, steps = 141\n",
      "00:03:35 [DEBUG] train episode 2176: reward = 165.00, steps = 165\n",
      "00:03:35 [DEBUG] train episode 2177: reward = 132.00, steps = 132\n",
      "00:03:35 [DEBUG] train episode 2178: reward = 118.00, steps = 118\n",
      "00:03:35 [DEBUG] train episode 2179: reward = 106.00, steps = 106\n",
      "00:03:35 [DEBUG] train episode 2180: reward = 135.00, steps = 135\n",
      "00:03:35 [DEBUG] train episode 2181: reward = 129.00, steps = 129\n",
      "00:03:35 [DEBUG] train episode 2182: reward = 165.00, steps = 165\n",
      "00:03:35 [DEBUG] train episode 2183: reward = 121.00, steps = 121\n",
      "00:03:35 [DEBUG] train episode 2184: reward = 106.00, steps = 106\n",
      "00:03:35 [DEBUG] train episode 2185: reward = 99.00, steps = 99\n",
      "00:03:35 [DEBUG] train episode 2186: reward = 152.00, steps = 152\n",
      "00:03:35 [DEBUG] train episode 2187: reward = 152.00, steps = 152\n",
      "00:03:35 [DEBUG] train episode 2188: reward = 139.00, steps = 139\n",
      "00:03:35 [DEBUG] train episode 2189: reward = 191.00, steps = 191\n",
      "00:03:35 [DEBUG] train episode 2190: reward = 119.00, steps = 119\n",
      "00:03:36 [DEBUG] train episode 2191: reward = 200.00, steps = 200\n",
      "00:03:36 [DEBUG] train episode 2192: reward = 189.00, steps = 189\n",
      "00:03:36 [DEBUG] train episode 2193: reward = 105.00, steps = 105\n",
      "00:03:36 [DEBUG] train episode 2194: reward = 126.00, steps = 126\n",
      "00:03:36 [DEBUG] train episode 2195: reward = 190.00, steps = 190\n",
      "00:03:36 [DEBUG] train episode 2196: reward = 146.00, steps = 146\n",
      "00:03:36 [DEBUG] train episode 2197: reward = 136.00, steps = 136\n",
      "00:03:36 [DEBUG] train episode 2198: reward = 161.00, steps = 161\n",
      "00:03:36 [DEBUG] train episode 2199: reward = 132.00, steps = 132\n",
      "00:03:36 [DEBUG] train episode 2200: reward = 127.00, steps = 127\n",
      "00:03:36 [DEBUG] train episode 2201: reward = 128.00, steps = 128\n",
      "00:03:36 [DEBUG] train episode 2202: reward = 137.00, steps = 137\n",
      "00:03:36 [DEBUG] train episode 2203: reward = 167.00, steps = 167\n",
      "00:03:36 [DEBUG] train episode 2204: reward = 133.00, steps = 133\n",
      "00:03:36 [DEBUG] train episode 2205: reward = 193.00, steps = 193\n",
      "00:03:36 [DEBUG] train episode 2206: reward = 92.00, steps = 92\n",
      "00:03:36 [DEBUG] train episode 2207: reward = 193.00, steps = 193\n",
      "00:03:36 [DEBUG] train episode 2208: reward = 115.00, steps = 115\n",
      "00:03:36 [DEBUG] train episode 2209: reward = 126.00, steps = 126\n",
      "00:03:36 [DEBUG] train episode 2210: reward = 117.00, steps = 117\n",
      "00:03:36 [DEBUG] train episode 2211: reward = 103.00, steps = 103\n",
      "00:03:36 [DEBUG] train episode 2212: reward = 115.00, steps = 115\n",
      "00:03:36 [DEBUG] train episode 2213: reward = 133.00, steps = 133\n",
      "00:03:36 [DEBUG] train episode 2214: reward = 83.00, steps = 83\n",
      "00:03:37 [DEBUG] train episode 2215: reward = 96.00, steps = 96\n",
      "00:03:37 [DEBUG] train episode 2216: reward = 200.00, steps = 200\n",
      "00:03:37 [DEBUG] train episode 2217: reward = 102.00, steps = 102\n",
      "00:03:37 [DEBUG] train episode 2218: reward = 167.00, steps = 167\n",
      "00:03:37 [DEBUG] train episode 2219: reward = 159.00, steps = 159\n",
      "00:03:37 [DEBUG] train episode 2220: reward = 106.00, steps = 106\n",
      "00:03:37 [DEBUG] train episode 2221: reward = 200.00, steps = 200\n",
      "00:03:37 [DEBUG] train episode 2222: reward = 144.00, steps = 144\n",
      "00:03:37 [DEBUG] train episode 2223: reward = 102.00, steps = 102\n",
      "00:03:37 [DEBUG] train episode 2224: reward = 125.00, steps = 125\n",
      "00:03:37 [DEBUG] train episode 2225: reward = 137.00, steps = 137\n",
      "00:03:37 [DEBUG] train episode 2226: reward = 131.00, steps = 131\n",
      "00:03:37 [DEBUG] train episode 2227: reward = 200.00, steps = 200\n",
      "00:03:37 [DEBUG] train episode 2228: reward = 131.00, steps = 131\n",
      "00:03:37 [DEBUG] train episode 2229: reward = 129.00, steps = 129\n",
      "00:03:37 [DEBUG] train episode 2230: reward = 200.00, steps = 200\n",
      "00:03:37 [DEBUG] train episode 2231: reward = 130.00, steps = 130\n",
      "00:03:37 [DEBUG] train episode 2232: reward = 115.00, steps = 115\n",
      "00:03:37 [DEBUG] train episode 2233: reward = 133.00, steps = 133\n",
      "00:03:37 [DEBUG] train episode 2234: reward = 168.00, steps = 168\n",
      "00:03:37 [DEBUG] train episode 2235: reward = 129.00, steps = 129\n",
      "00:03:37 [DEBUG] train episode 2236: reward = 168.00, steps = 168\n",
      "00:03:38 [DEBUG] train episode 2237: reward = 118.00, steps = 118\n",
      "00:03:38 [DEBUG] train episode 2238: reward = 135.00, steps = 135\n",
      "00:03:38 [DEBUG] train episode 2239: reward = 173.00, steps = 173\n",
      "00:03:38 [DEBUG] train episode 2240: reward = 171.00, steps = 171\n",
      "00:03:38 [DEBUG] train episode 2241: reward = 97.00, steps = 97\n",
      "00:03:38 [DEBUG] train episode 2242: reward = 127.00, steps = 127\n",
      "00:03:38 [DEBUG] train episode 2243: reward = 91.00, steps = 91\n",
      "00:03:38 [DEBUG] train episode 2244: reward = 124.00, steps = 124\n",
      "00:03:38 [DEBUG] train episode 2245: reward = 97.00, steps = 97\n",
      "00:03:38 [DEBUG] train episode 2246: reward = 78.00, steps = 78\n",
      "00:03:38 [DEBUG] train episode 2247: reward = 123.00, steps = 123\n",
      "00:03:38 [DEBUG] train episode 2248: reward = 191.00, steps = 191\n",
      "00:03:38 [DEBUG] train episode 2249: reward = 114.00, steps = 114\n",
      "00:03:38 [DEBUG] train episode 2250: reward = 156.00, steps = 156\n",
      "00:03:38 [DEBUG] train episode 2251: reward = 115.00, steps = 115\n",
      "00:03:38 [DEBUG] train episode 2252: reward = 200.00, steps = 200\n",
      "00:03:38 [DEBUG] train episode 2253: reward = 128.00, steps = 128\n",
      "00:03:38 [DEBUG] train episode 2254: reward = 111.00, steps = 111\n",
      "00:03:38 [DEBUG] train episode 2255: reward = 139.00, steps = 139\n",
      "00:03:38 [DEBUG] train episode 2256: reward = 101.00, steps = 101\n",
      "00:03:38 [DEBUG] train episode 2257: reward = 142.00, steps = 142\n",
      "00:03:38 [DEBUG] train episode 2258: reward = 100.00, steps = 100\n",
      "00:03:38 [DEBUG] train episode 2259: reward = 122.00, steps = 122\n",
      "00:03:38 [DEBUG] train episode 2260: reward = 131.00, steps = 131\n",
      "00:03:38 [DEBUG] train episode 2261: reward = 146.00, steps = 146\n",
      "00:03:38 [DEBUG] train episode 2262: reward = 150.00, steps = 150\n",
      "00:03:39 [DEBUG] train episode 2263: reward = 199.00, steps = 199\n",
      "00:03:39 [DEBUG] train episode 2264: reward = 132.00, steps = 132\n",
      "00:03:39 [DEBUG] train episode 2265: reward = 117.00, steps = 117\n",
      "00:03:39 [DEBUG] train episode 2266: reward = 179.00, steps = 179\n",
      "00:03:39 [DEBUG] train episode 2267: reward = 120.00, steps = 120\n",
      "00:03:39 [DEBUG] train episode 2268: reward = 112.00, steps = 112\n",
      "00:03:39 [DEBUG] train episode 2269: reward = 120.00, steps = 120\n",
      "00:03:39 [DEBUG] train episode 2270: reward = 100.00, steps = 100\n",
      "00:03:39 [DEBUG] train episode 2271: reward = 119.00, steps = 119\n",
      "00:03:39 [DEBUG] train episode 2272: reward = 81.00, steps = 81\n",
      "00:03:39 [DEBUG] train episode 2273: reward = 118.00, steps = 118\n",
      "00:03:39 [DEBUG] train episode 2274: reward = 179.00, steps = 179\n",
      "00:03:39 [DEBUG] train episode 2275: reward = 200.00, steps = 200\n",
      "00:03:39 [DEBUG] train episode 2276: reward = 193.00, steps = 193\n",
      "00:03:39 [DEBUG] train episode 2277: reward = 79.00, steps = 79\n",
      "00:03:39 [DEBUG] train episode 2278: reward = 115.00, steps = 115\n",
      "00:03:39 [DEBUG] train episode 2279: reward = 111.00, steps = 111\n",
      "00:03:39 [DEBUG] train episode 2280: reward = 144.00, steps = 144\n",
      "00:03:39 [DEBUG] train episode 2281: reward = 137.00, steps = 137\n",
      "00:03:39 [DEBUG] train episode 2282: reward = 131.00, steps = 131\n",
      "00:03:39 [DEBUG] train episode 2283: reward = 103.00, steps = 103\n",
      "00:03:39 [DEBUG] train episode 2284: reward = 155.00, steps = 155\n",
      "00:03:39 [DEBUG] train episode 2285: reward = 93.00, steps = 93\n",
      "00:03:39 [DEBUG] train episode 2286: reward = 149.00, steps = 149\n",
      "00:03:39 [DEBUG] train episode 2287: reward = 97.00, steps = 97\n",
      "00:03:40 [DEBUG] train episode 2288: reward = 107.00, steps = 107\n",
      "00:03:40 [DEBUG] train episode 2289: reward = 126.00, steps = 126\n",
      "00:03:40 [DEBUG] train episode 2290: reward = 108.00, steps = 108\n",
      "00:03:40 [DEBUG] train episode 2291: reward = 136.00, steps = 136\n",
      "00:03:40 [DEBUG] train episode 2292: reward = 132.00, steps = 132\n",
      "00:03:40 [DEBUG] train episode 2293: reward = 110.00, steps = 110\n",
      "00:03:40 [DEBUG] train episode 2294: reward = 146.00, steps = 146\n",
      "00:03:40 [DEBUG] train episode 2295: reward = 200.00, steps = 200\n",
      "00:03:40 [DEBUG] train episode 2296: reward = 140.00, steps = 140\n",
      "00:03:40 [DEBUG] train episode 2297: reward = 109.00, steps = 109\n",
      "00:03:40 [DEBUG] train episode 2298: reward = 96.00, steps = 96\n",
      "00:03:40 [DEBUG] train episode 2299: reward = 173.00, steps = 173\n",
      "00:03:40 [DEBUG] train episode 2300: reward = 125.00, steps = 125\n",
      "00:03:40 [DEBUG] train episode 2301: reward = 127.00, steps = 127\n",
      "00:03:40 [DEBUG] train episode 2302: reward = 200.00, steps = 200\n",
      "00:03:40 [DEBUG] train episode 2303: reward = 173.00, steps = 173\n",
      "00:03:40 [DEBUG] train episode 2304: reward = 131.00, steps = 131\n",
      "00:03:40 [DEBUG] train episode 2305: reward = 200.00, steps = 200\n",
      "00:03:40 [DEBUG] train episode 2306: reward = 174.00, steps = 174\n",
      "00:03:40 [DEBUG] train episode 2307: reward = 86.00, steps = 86\n",
      "00:03:40 [DEBUG] train episode 2308: reward = 114.00, steps = 114\n",
      "00:03:40 [DEBUG] train episode 2309: reward = 131.00, steps = 131\n",
      "00:03:40 [DEBUG] train episode 2310: reward = 127.00, steps = 127\n",
      "00:03:40 [DEBUG] train episode 2311: reward = 95.00, steps = 95\n",
      "00:03:41 [DEBUG] train episode 2312: reward = 110.00, steps = 110\n",
      "00:03:41 [DEBUG] train episode 2313: reward = 89.00, steps = 89\n",
      "00:03:41 [DEBUG] train episode 2314: reward = 126.00, steps = 126\n",
      "00:03:41 [DEBUG] train episode 2315: reward = 112.00, steps = 112\n",
      "00:03:41 [DEBUG] train episode 2316: reward = 93.00, steps = 93\n",
      "00:03:41 [DEBUG] train episode 2317: reward = 126.00, steps = 126\n",
      "00:03:41 [DEBUG] train episode 2318: reward = 98.00, steps = 98\n",
      "00:03:41 [DEBUG] train episode 2319: reward = 200.00, steps = 200\n",
      "00:03:41 [DEBUG] train episode 2320: reward = 200.00, steps = 200\n",
      "00:03:41 [DEBUG] train episode 2321: reward = 101.00, steps = 101\n",
      "00:03:41 [DEBUG] train episode 2322: reward = 150.00, steps = 150\n",
      "00:03:41 [DEBUG] train episode 2323: reward = 106.00, steps = 106\n",
      "00:03:41 [DEBUG] train episode 2324: reward = 105.00, steps = 105\n",
      "00:03:41 [DEBUG] train episode 2325: reward = 121.00, steps = 121\n",
      "00:03:41 [DEBUG] train episode 2326: reward = 120.00, steps = 120\n",
      "00:03:41 [DEBUG] train episode 2327: reward = 110.00, steps = 110\n",
      "00:03:41 [DEBUG] train episode 2328: reward = 100.00, steps = 100\n",
      "00:03:41 [DEBUG] train episode 2329: reward = 200.00, steps = 200\n",
      "00:03:41 [DEBUG] train episode 2330: reward = 106.00, steps = 106\n",
      "00:03:41 [DEBUG] train episode 2331: reward = 130.00, steps = 130\n",
      "00:03:41 [DEBUG] train episode 2332: reward = 130.00, steps = 130\n",
      "00:03:41 [DEBUG] train episode 2333: reward = 200.00, steps = 200\n",
      "00:03:41 [DEBUG] train episode 2334: reward = 140.00, steps = 140\n",
      "00:03:41 [DEBUG] train episode 2335: reward = 189.00, steps = 189\n",
      "00:03:42 [DEBUG] train episode 2336: reward = 167.00, steps = 167\n",
      "00:03:42 [DEBUG] train episode 2337: reward = 132.00, steps = 132\n",
      "00:03:42 [DEBUG] train episode 2338: reward = 84.00, steps = 84\n",
      "00:03:42 [DEBUG] train episode 2339: reward = 109.00, steps = 109\n",
      "00:03:42 [DEBUG] train episode 2340: reward = 115.00, steps = 115\n",
      "00:03:42 [DEBUG] train episode 2341: reward = 136.00, steps = 136\n",
      "00:03:42 [DEBUG] train episode 2342: reward = 188.00, steps = 188\n",
      "00:03:42 [DEBUG] train episode 2343: reward = 138.00, steps = 138\n",
      "00:03:42 [DEBUG] train episode 2344: reward = 100.00, steps = 100\n",
      "00:03:42 [DEBUG] train episode 2345: reward = 169.00, steps = 169\n",
      "00:03:42 [DEBUG] train episode 2346: reward = 168.00, steps = 168\n",
      "00:03:42 [DEBUG] train episode 2347: reward = 183.00, steps = 183\n",
      "00:03:42 [DEBUG] train episode 2348: reward = 97.00, steps = 97\n",
      "00:03:42 [DEBUG] train episode 2349: reward = 86.00, steps = 86\n",
      "00:03:42 [DEBUG] train episode 2350: reward = 98.00, steps = 98\n",
      "00:03:42 [DEBUG] train episode 2351: reward = 112.00, steps = 112\n",
      "00:03:42 [DEBUG] train episode 2352: reward = 117.00, steps = 117\n",
      "00:03:42 [DEBUG] train episode 2353: reward = 168.00, steps = 168\n",
      "00:03:42 [DEBUG] train episode 2354: reward = 122.00, steps = 122\n",
      "00:03:42 [DEBUG] train episode 2355: reward = 130.00, steps = 130\n",
      "00:03:42 [DEBUG] train episode 2356: reward = 186.00, steps = 186\n",
      "00:03:42 [DEBUG] train episode 2357: reward = 200.00, steps = 200\n",
      "00:03:42 [DEBUG] train episode 2358: reward = 122.00, steps = 122\n",
      "00:03:42 [DEBUG] train episode 2359: reward = 161.00, steps = 161\n",
      "00:03:43 [DEBUG] train episode 2360: reward = 98.00, steps = 98\n",
      "00:03:43 [DEBUG] train episode 2361: reward = 100.00, steps = 100\n",
      "00:03:43 [DEBUG] train episode 2362: reward = 168.00, steps = 168\n",
      "00:03:43 [DEBUG] train episode 2363: reward = 101.00, steps = 101\n",
      "00:03:43 [DEBUG] train episode 2364: reward = 112.00, steps = 112\n",
      "00:03:43 [DEBUG] train episode 2365: reward = 162.00, steps = 162\n",
      "00:03:43 [DEBUG] train episode 2366: reward = 121.00, steps = 121\n",
      "00:03:43 [DEBUG] train episode 2367: reward = 88.00, steps = 88\n",
      "00:03:43 [DEBUG] train episode 2368: reward = 153.00, steps = 153\n",
      "00:03:43 [DEBUG] train episode 2369: reward = 84.00, steps = 84\n",
      "00:03:43 [DEBUG] train episode 2370: reward = 90.00, steps = 90\n",
      "00:03:43 [DEBUG] train episode 2371: reward = 135.00, steps = 135\n",
      "00:03:43 [DEBUG] train episode 2372: reward = 107.00, steps = 107\n",
      "00:03:43 [DEBUG] train episode 2373: reward = 110.00, steps = 110\n",
      "00:03:43 [DEBUG] train episode 2374: reward = 123.00, steps = 123\n",
      "00:03:43 [DEBUG] train episode 2375: reward = 171.00, steps = 171\n",
      "00:03:43 [DEBUG] train episode 2376: reward = 115.00, steps = 115\n",
      "00:03:43 [DEBUG] train episode 2377: reward = 116.00, steps = 116\n",
      "00:03:43 [DEBUG] train episode 2378: reward = 99.00, steps = 99\n",
      "00:03:43 [DEBUG] train episode 2379: reward = 100.00, steps = 100\n",
      "00:03:43 [DEBUG] train episode 2380: reward = 135.00, steps = 135\n",
      "00:03:43 [DEBUG] train episode 2381: reward = 117.00, steps = 117\n",
      "00:03:43 [DEBUG] train episode 2382: reward = 101.00, steps = 101\n",
      "00:03:43 [DEBUG] train episode 2383: reward = 131.00, steps = 131\n",
      "00:03:43 [DEBUG] train episode 2384: reward = 109.00, steps = 109\n",
      "00:03:43 [DEBUG] train episode 2385: reward = 197.00, steps = 197\n",
      "00:03:44 [DEBUG] train episode 2386: reward = 115.00, steps = 115\n",
      "00:03:44 [DEBUG] train episode 2387: reward = 162.00, steps = 162\n",
      "00:03:44 [DEBUG] train episode 2388: reward = 104.00, steps = 104\n",
      "00:03:44 [DEBUG] train episode 2389: reward = 135.00, steps = 135\n",
      "00:03:44 [DEBUG] train episode 2390: reward = 125.00, steps = 125\n",
      "00:03:44 [DEBUG] train episode 2391: reward = 101.00, steps = 101\n",
      "00:03:44 [DEBUG] train episode 2392: reward = 98.00, steps = 98\n",
      "00:03:44 [DEBUG] train episode 2393: reward = 101.00, steps = 101\n",
      "00:03:44 [DEBUG] train episode 2394: reward = 105.00, steps = 105\n",
      "00:03:44 [DEBUG] train episode 2395: reward = 107.00, steps = 107\n",
      "00:03:44 [DEBUG] train episode 2396: reward = 172.00, steps = 172\n",
      "00:03:44 [DEBUG] train episode 2397: reward = 133.00, steps = 133\n",
      "00:03:44 [DEBUG] train episode 2398: reward = 200.00, steps = 200\n",
      "00:03:44 [DEBUG] train episode 2399: reward = 197.00, steps = 197\n",
      "00:03:44 [DEBUG] train episode 2400: reward = 129.00, steps = 129\n",
      "00:03:44 [DEBUG] train episode 2401: reward = 116.00, steps = 116\n",
      "00:03:44 [DEBUG] train episode 2402: reward = 143.00, steps = 143\n",
      "00:03:44 [DEBUG] train episode 2403: reward = 200.00, steps = 200\n",
      "00:03:44 [DEBUG] train episode 2404: reward = 127.00, steps = 127\n",
      "00:03:44 [DEBUG] train episode 2405: reward = 109.00, steps = 109\n",
      "00:03:44 [DEBUG] train episode 2406: reward = 125.00, steps = 125\n",
      "00:03:44 [DEBUG] train episode 2407: reward = 200.00, steps = 200\n",
      "00:03:44 [DEBUG] train episode 2408: reward = 99.00, steps = 99\n",
      "00:03:44 [DEBUG] train episode 2409: reward = 177.00, steps = 177\n",
      "00:03:45 [DEBUG] train episode 2410: reward = 133.00, steps = 133\n",
      "00:03:45 [DEBUG] train episode 2411: reward = 134.00, steps = 134\n",
      "00:03:45 [DEBUG] train episode 2412: reward = 137.00, steps = 137\n",
      "00:03:45 [DEBUG] train episode 2413: reward = 142.00, steps = 142\n",
      "00:03:45 [DEBUG] train episode 2414: reward = 96.00, steps = 96\n",
      "00:03:45 [DEBUG] train episode 2415: reward = 137.00, steps = 137\n",
      "00:03:45 [DEBUG] train episode 2416: reward = 113.00, steps = 113\n",
      "00:03:45 [DEBUG] train episode 2417: reward = 118.00, steps = 118\n",
      "00:03:45 [DEBUG] train episode 2418: reward = 200.00, steps = 200\n",
      "00:03:45 [DEBUG] train episode 2419: reward = 141.00, steps = 141\n",
      "00:03:45 [DEBUG] train episode 2420: reward = 91.00, steps = 91\n",
      "00:03:45 [DEBUG] train episode 2421: reward = 118.00, steps = 118\n",
      "00:03:45 [DEBUG] train episode 2422: reward = 144.00, steps = 144\n",
      "00:03:45 [DEBUG] train episode 2423: reward = 108.00, steps = 108\n",
      "00:03:45 [DEBUG] train episode 2424: reward = 98.00, steps = 98\n",
      "00:03:45 [DEBUG] train episode 2425: reward = 150.00, steps = 150\n",
      "00:03:45 [DEBUG] train episode 2426: reward = 157.00, steps = 157\n",
      "00:03:45 [DEBUG] train episode 2427: reward = 133.00, steps = 133\n",
      "00:03:45 [DEBUG] train episode 2428: reward = 160.00, steps = 160\n",
      "00:03:45 [DEBUG] train episode 2429: reward = 200.00, steps = 200\n",
      "00:03:45 [DEBUG] train episode 2430: reward = 153.00, steps = 153\n",
      "00:03:45 [DEBUG] train episode 2431: reward = 112.00, steps = 112\n",
      "00:03:45 [DEBUG] train episode 2432: reward = 129.00, steps = 129\n",
      "00:03:46 [DEBUG] train episode 2433: reward = 86.00, steps = 86\n",
      "00:03:46 [DEBUG] train episode 2434: reward = 102.00, steps = 102\n",
      "00:03:46 [DEBUG] train episode 2435: reward = 139.00, steps = 139\n",
      "00:03:46 [DEBUG] train episode 2436: reward = 130.00, steps = 130\n",
      "00:03:46 [DEBUG] train episode 2437: reward = 135.00, steps = 135\n",
      "00:03:46 [DEBUG] train episode 2438: reward = 100.00, steps = 100\n",
      "00:03:46 [DEBUG] train episode 2439: reward = 130.00, steps = 130\n",
      "00:03:46 [DEBUG] train episode 2440: reward = 93.00, steps = 93\n",
      "00:03:46 [DEBUG] train episode 2441: reward = 151.00, steps = 151\n",
      "00:03:46 [DEBUG] train episode 2442: reward = 106.00, steps = 106\n",
      "00:03:46 [DEBUG] train episode 2443: reward = 102.00, steps = 102\n",
      "00:03:46 [DEBUG] train episode 2444: reward = 151.00, steps = 151\n",
      "00:03:46 [DEBUG] train episode 2445: reward = 115.00, steps = 115\n",
      "00:03:46 [DEBUG] train episode 2446: reward = 107.00, steps = 107\n",
      "00:03:46 [DEBUG] train episode 2447: reward = 200.00, steps = 200\n",
      "00:03:46 [DEBUG] train episode 2448: reward = 120.00, steps = 120\n",
      "00:03:46 [DEBUG] train episode 2449: reward = 138.00, steps = 138\n",
      "00:03:46 [DEBUG] train episode 2450: reward = 127.00, steps = 127\n",
      "00:03:46 [DEBUG] train episode 2451: reward = 121.00, steps = 121\n",
      "00:03:46 [DEBUG] train episode 2452: reward = 137.00, steps = 137\n",
      "00:03:46 [DEBUG] train episode 2453: reward = 148.00, steps = 148\n",
      "00:03:46 [DEBUG] train episode 2454: reward = 151.00, steps = 151\n",
      "00:03:46 [DEBUG] train episode 2455: reward = 136.00, steps = 136\n",
      "00:03:46 [DEBUG] train episode 2456: reward = 100.00, steps = 100\n",
      "00:03:46 [DEBUG] train episode 2457: reward = 162.00, steps = 162\n",
      "00:03:47 [DEBUG] train episode 2458: reward = 132.00, steps = 132\n",
      "00:03:47 [DEBUG] train episode 2459: reward = 101.00, steps = 101\n",
      "00:03:47 [DEBUG] train episode 2460: reward = 98.00, steps = 98\n",
      "00:03:47 [DEBUG] train episode 2461: reward = 147.00, steps = 147\n",
      "00:03:47 [DEBUG] train episode 2462: reward = 141.00, steps = 141\n",
      "00:03:47 [DEBUG] train episode 2463: reward = 115.00, steps = 115\n",
      "00:03:47 [DEBUG] train episode 2464: reward = 136.00, steps = 136\n",
      "00:03:47 [DEBUG] train episode 2465: reward = 200.00, steps = 200\n",
      "00:03:47 [DEBUG] train episode 2466: reward = 83.00, steps = 83\n",
      "00:03:47 [DEBUG] train episode 2467: reward = 158.00, steps = 158\n",
      "00:03:47 [DEBUG] train episode 2468: reward = 200.00, steps = 200\n",
      "00:03:47 [DEBUG] train episode 2469: reward = 122.00, steps = 122\n",
      "00:03:47 [DEBUG] train episode 2470: reward = 128.00, steps = 128\n",
      "00:03:47 [DEBUG] train episode 2471: reward = 88.00, steps = 88\n",
      "00:03:47 [DEBUG] train episode 2472: reward = 106.00, steps = 106\n",
      "00:03:47 [DEBUG] train episode 2473: reward = 200.00, steps = 200\n",
      "00:03:47 [DEBUG] train episode 2474: reward = 128.00, steps = 128\n",
      "00:03:47 [DEBUG] train episode 2475: reward = 151.00, steps = 151\n",
      "00:03:47 [DEBUG] train episode 2476: reward = 152.00, steps = 152\n",
      "00:03:47 [DEBUG] train episode 2477: reward = 178.00, steps = 178\n",
      "00:03:47 [DEBUG] train episode 2478: reward = 153.00, steps = 153\n",
      "00:03:47 [DEBUG] train episode 2479: reward = 150.00, steps = 150\n",
      "00:03:47 [DEBUG] train episode 2480: reward = 112.00, steps = 112\n",
      "00:03:47 [DEBUG] train episode 2481: reward = 93.00, steps = 93\n",
      "00:03:48 [DEBUG] train episode 2482: reward = 151.00, steps = 151\n",
      "00:03:48 [DEBUG] train episode 2483: reward = 109.00, steps = 109\n",
      "00:03:48 [DEBUG] train episode 2484: reward = 113.00, steps = 113\n",
      "00:03:48 [DEBUG] train episode 2485: reward = 199.00, steps = 199\n",
      "00:03:48 [DEBUG] train episode 2486: reward = 156.00, steps = 156\n",
      "00:03:48 [DEBUG] train episode 2487: reward = 119.00, steps = 119\n",
      "00:03:48 [DEBUG] train episode 2488: reward = 135.00, steps = 135\n",
      "00:03:48 [DEBUG] train episode 2489: reward = 118.00, steps = 118\n",
      "00:03:48 [DEBUG] train episode 2490: reward = 131.00, steps = 131\n",
      "00:03:48 [DEBUG] train episode 2491: reward = 96.00, steps = 96\n",
      "00:03:48 [DEBUG] train episode 2492: reward = 131.00, steps = 131\n",
      "00:03:48 [DEBUG] train episode 2493: reward = 164.00, steps = 164\n",
      "00:03:48 [DEBUG] train episode 2494: reward = 119.00, steps = 119\n",
      "00:03:48 [DEBUG] train episode 2495: reward = 112.00, steps = 112\n",
      "00:03:48 [DEBUG] train episode 2496: reward = 76.00, steps = 76\n",
      "00:03:48 [DEBUG] train episode 2497: reward = 105.00, steps = 105\n",
      "00:03:48 [DEBUG] train episode 2498: reward = 105.00, steps = 105\n",
      "00:03:48 [DEBUG] train episode 2499: reward = 123.00, steps = 123\n",
      "00:03:48 [DEBUG] train episode 2500: reward = 200.00, steps = 200\n",
      "00:03:48 [DEBUG] train episode 2501: reward = 128.00, steps = 128\n",
      "00:03:48 [DEBUG] train episode 2502: reward = 187.00, steps = 187\n",
      "00:03:48 [DEBUG] train episode 2503: reward = 200.00, steps = 200\n",
      "00:03:48 [DEBUG] train episode 2504: reward = 98.00, steps = 98\n",
      "00:03:48 [DEBUG] train episode 2505: reward = 131.00, steps = 131\n",
      "00:03:48 [DEBUG] train episode 2506: reward = 140.00, steps = 140\n",
      "00:03:49 [DEBUG] train episode 2507: reward = 110.00, steps = 110\n",
      "00:03:49 [DEBUG] train episode 2508: reward = 156.00, steps = 156\n",
      "00:03:49 [DEBUG] train episode 2509: reward = 92.00, steps = 92\n",
      "00:03:49 [DEBUG] train episode 2510: reward = 200.00, steps = 200\n",
      "00:03:49 [DEBUG] train episode 2511: reward = 121.00, steps = 121\n",
      "00:03:49 [DEBUG] train episode 2512: reward = 126.00, steps = 126\n",
      "00:03:49 [DEBUG] train episode 2513: reward = 155.00, steps = 155\n",
      "00:03:49 [DEBUG] train episode 2514: reward = 188.00, steps = 188\n",
      "00:03:49 [DEBUG] train episode 2515: reward = 118.00, steps = 118\n",
      "00:03:49 [DEBUG] train episode 2516: reward = 109.00, steps = 109\n",
      "00:03:49 [DEBUG] train episode 2517: reward = 147.00, steps = 147\n",
      "00:03:49 [DEBUG] train episode 2518: reward = 164.00, steps = 164\n",
      "00:03:49 [DEBUG] train episode 2519: reward = 115.00, steps = 115\n",
      "00:03:49 [DEBUG] train episode 2520: reward = 170.00, steps = 170\n",
      "00:03:49 [DEBUG] train episode 2521: reward = 100.00, steps = 100\n",
      "00:03:49 [DEBUG] train episode 2522: reward = 109.00, steps = 109\n",
      "00:03:49 [DEBUG] train episode 2523: reward = 192.00, steps = 192\n",
      "00:03:49 [DEBUG] train episode 2524: reward = 98.00, steps = 98\n",
      "00:03:49 [DEBUG] train episode 2525: reward = 167.00, steps = 167\n",
      "00:03:49 [DEBUG] train episode 2526: reward = 101.00, steps = 101\n",
      "00:03:49 [DEBUG] train episode 2527: reward = 178.00, steps = 178\n",
      "00:03:49 [DEBUG] train episode 2528: reward = 153.00, steps = 153\n",
      "00:03:49 [DEBUG] train episode 2529: reward = 178.00, steps = 178\n",
      "00:03:50 [DEBUG] train episode 2530: reward = 122.00, steps = 122\n",
      "00:03:50 [DEBUG] train episode 2531: reward = 161.00, steps = 161\n",
      "00:03:50 [DEBUG] train episode 2532: reward = 98.00, steps = 98\n",
      "00:03:50 [DEBUG] train episode 2533: reward = 142.00, steps = 142\n",
      "00:03:50 [DEBUG] train episode 2534: reward = 177.00, steps = 177\n",
      "00:03:50 [DEBUG] train episode 2535: reward = 147.00, steps = 147\n",
      "00:03:50 [DEBUG] train episode 2536: reward = 148.00, steps = 148\n",
      "00:03:50 [DEBUG] train episode 2537: reward = 141.00, steps = 141\n",
      "00:03:50 [DEBUG] train episode 2538: reward = 116.00, steps = 116\n",
      "00:03:50 [DEBUG] train episode 2539: reward = 155.00, steps = 155\n",
      "00:03:50 [DEBUG] train episode 2540: reward = 138.00, steps = 138\n",
      "00:03:50 [DEBUG] train episode 2541: reward = 193.00, steps = 193\n",
      "00:03:50 [DEBUG] train episode 2542: reward = 200.00, steps = 200\n",
      "00:03:50 [DEBUG] train episode 2543: reward = 153.00, steps = 153\n",
      "00:03:50 [DEBUG] train episode 2544: reward = 200.00, steps = 200\n",
      "00:03:50 [DEBUG] train episode 2545: reward = 161.00, steps = 161\n",
      "00:03:50 [DEBUG] train episode 2546: reward = 126.00, steps = 126\n",
      "00:03:50 [DEBUG] train episode 2547: reward = 200.00, steps = 200\n",
      "00:03:50 [DEBUG] train episode 2548: reward = 160.00, steps = 160\n",
      "00:03:50 [DEBUG] train episode 2549: reward = 200.00, steps = 200\n",
      "00:03:50 [DEBUG] train episode 2550: reward = 141.00, steps = 141\n",
      "00:03:51 [DEBUG] train episode 2551: reward = 124.00, steps = 124\n",
      "00:03:51 [DEBUG] train episode 2552: reward = 167.00, steps = 167\n",
      "00:03:51 [DEBUG] train episode 2553: reward = 118.00, steps = 118\n",
      "00:03:51 [DEBUG] train episode 2554: reward = 81.00, steps = 81\n",
      "00:03:51 [DEBUG] train episode 2555: reward = 111.00, steps = 111\n",
      "00:03:51 [DEBUG] train episode 2556: reward = 122.00, steps = 122\n",
      "00:03:51 [DEBUG] train episode 2557: reward = 98.00, steps = 98\n",
      "00:03:51 [DEBUG] train episode 2558: reward = 116.00, steps = 116\n",
      "00:03:51 [DEBUG] train episode 2559: reward = 119.00, steps = 119\n",
      "00:03:51 [DEBUG] train episode 2560: reward = 160.00, steps = 160\n",
      "00:03:51 [DEBUG] train episode 2561: reward = 143.00, steps = 143\n",
      "00:03:51 [DEBUG] train episode 2562: reward = 98.00, steps = 98\n",
      "00:03:51 [DEBUG] train episode 2563: reward = 130.00, steps = 130\n",
      "00:03:51 [DEBUG] train episode 2564: reward = 166.00, steps = 166\n",
      "00:03:51 [DEBUG] train episode 2565: reward = 155.00, steps = 155\n",
      "00:03:51 [DEBUG] train episode 2566: reward = 192.00, steps = 192\n",
      "00:03:51 [DEBUG] train episode 2567: reward = 200.00, steps = 200\n",
      "00:03:51 [DEBUG] train episode 2568: reward = 178.00, steps = 178\n",
      "00:03:51 [DEBUG] train episode 2569: reward = 138.00, steps = 138\n",
      "00:03:51 [DEBUG] train episode 2570: reward = 103.00, steps = 103\n",
      "00:03:51 [DEBUG] train episode 2571: reward = 200.00, steps = 200\n",
      "00:03:51 [DEBUG] train episode 2572: reward = 180.00, steps = 180\n",
      "00:03:51 [DEBUG] train episode 2573: reward = 105.00, steps = 105\n",
      "00:03:52 [DEBUG] train episode 2574: reward = 150.00, steps = 150\n",
      "00:03:52 [DEBUG] train episode 2575: reward = 200.00, steps = 200\n",
      "00:03:52 [DEBUG] train episode 2576: reward = 168.00, steps = 168\n",
      "00:03:52 [DEBUG] train episode 2577: reward = 200.00, steps = 200\n",
      "00:03:52 [DEBUG] train episode 2578: reward = 178.00, steps = 178\n",
      "00:03:52 [DEBUG] train episode 2579: reward = 142.00, steps = 142\n",
      "00:03:52 [DEBUG] train episode 2580: reward = 125.00, steps = 125\n",
      "00:03:52 [DEBUG] train episode 2581: reward = 100.00, steps = 100\n",
      "00:03:52 [DEBUG] train episode 2582: reward = 152.00, steps = 152\n",
      "00:03:52 [DEBUG] train episode 2583: reward = 145.00, steps = 145\n",
      "00:03:52 [DEBUG] train episode 2584: reward = 124.00, steps = 124\n",
      "00:03:52 [DEBUG] train episode 2585: reward = 200.00, steps = 200\n",
      "00:03:52 [DEBUG] train episode 2586: reward = 125.00, steps = 125\n",
      "00:03:52 [DEBUG] train episode 2587: reward = 137.00, steps = 137\n",
      "00:03:52 [DEBUG] train episode 2588: reward = 154.00, steps = 154\n",
      "00:03:52 [DEBUG] train episode 2589: reward = 119.00, steps = 119\n",
      "00:03:52 [DEBUG] train episode 2590: reward = 123.00, steps = 123\n",
      "00:03:52 [DEBUG] train episode 2591: reward = 192.00, steps = 192\n",
      "00:03:52 [DEBUG] train episode 2592: reward = 125.00, steps = 125\n",
      "00:03:52 [DEBUG] train episode 2593: reward = 93.00, steps = 93\n",
      "00:03:52 [DEBUG] train episode 2594: reward = 131.00, steps = 131\n",
      "00:03:52 [DEBUG] train episode 2595: reward = 91.00, steps = 91\n",
      "00:03:52 [DEBUG] train episode 2596: reward = 127.00, steps = 127\n",
      "00:03:53 [DEBUG] train episode 2597: reward = 159.00, steps = 159\n",
      "00:03:53 [DEBUG] train episode 2598: reward = 129.00, steps = 129\n",
      "00:03:53 [DEBUG] train episode 2599: reward = 142.00, steps = 142\n",
      "00:03:53 [DEBUG] train episode 2600: reward = 200.00, steps = 200\n",
      "00:03:53 [DEBUG] train episode 2601: reward = 154.00, steps = 154\n",
      "00:03:53 [DEBUG] train episode 2602: reward = 120.00, steps = 120\n",
      "00:03:53 [DEBUG] train episode 2603: reward = 141.00, steps = 141\n",
      "00:03:53 [DEBUG] train episode 2604: reward = 200.00, steps = 200\n",
      "00:03:53 [DEBUG] train episode 2605: reward = 116.00, steps = 116\n",
      "00:03:53 [DEBUG] train episode 2606: reward = 139.00, steps = 139\n",
      "00:03:53 [DEBUG] train episode 2607: reward = 113.00, steps = 113\n",
      "00:03:53 [DEBUG] train episode 2608: reward = 144.00, steps = 144\n",
      "00:03:53 [DEBUG] train episode 2609: reward = 190.00, steps = 190\n",
      "00:03:53 [DEBUG] train episode 2610: reward = 133.00, steps = 133\n",
      "00:03:53 [DEBUG] train episode 2611: reward = 97.00, steps = 97\n",
      "00:03:53 [DEBUG] train episode 2612: reward = 111.00, steps = 111\n",
      "00:03:53 [DEBUG] train episode 2613: reward = 176.00, steps = 176\n",
      "00:03:53 [DEBUG] train episode 2614: reward = 118.00, steps = 118\n",
      "00:03:53 [DEBUG] train episode 2615: reward = 103.00, steps = 103\n",
      "00:03:53 [DEBUG] train episode 2616: reward = 200.00, steps = 200\n",
      "00:03:53 [DEBUG] train episode 2617: reward = 153.00, steps = 153\n",
      "00:03:53 [DEBUG] train episode 2618: reward = 164.00, steps = 164\n",
      "00:03:53 [DEBUG] train episode 2619: reward = 196.00, steps = 196\n",
      "00:03:54 [DEBUG] train episode 2620: reward = 114.00, steps = 114\n",
      "00:03:54 [DEBUG] train episode 2621: reward = 112.00, steps = 112\n",
      "00:03:54 [DEBUG] train episode 2622: reward = 88.00, steps = 88\n",
      "00:03:54 [DEBUG] train episode 2623: reward = 137.00, steps = 137\n",
      "00:03:54 [DEBUG] train episode 2624: reward = 127.00, steps = 127\n",
      "00:03:54 [DEBUG] train episode 2625: reward = 131.00, steps = 131\n",
      "00:03:54 [DEBUG] train episode 2626: reward = 174.00, steps = 174\n",
      "00:03:54 [DEBUG] train episode 2627: reward = 112.00, steps = 112\n",
      "00:03:54 [DEBUG] train episode 2628: reward = 168.00, steps = 168\n",
      "00:03:54 [DEBUG] train episode 2629: reward = 103.00, steps = 103\n",
      "00:03:54 [DEBUG] train episode 2630: reward = 164.00, steps = 164\n",
      "00:03:54 [DEBUG] train episode 2631: reward = 122.00, steps = 122\n",
      "00:03:54 [DEBUG] train episode 2632: reward = 143.00, steps = 143\n",
      "00:03:54 [DEBUG] train episode 2633: reward = 143.00, steps = 143\n",
      "00:03:54 [DEBUG] train episode 2634: reward = 200.00, steps = 200\n",
      "00:03:54 [DEBUG] train episode 2635: reward = 129.00, steps = 129\n",
      "00:03:54 [DEBUG] train episode 2636: reward = 107.00, steps = 107\n",
      "00:03:54 [DEBUG] train episode 2637: reward = 169.00, steps = 169\n",
      "00:03:54 [DEBUG] train episode 2638: reward = 149.00, steps = 149\n",
      "00:03:54 [DEBUG] train episode 2639: reward = 99.00, steps = 99\n",
      "00:03:54 [DEBUG] train episode 2640: reward = 130.00, steps = 130\n",
      "00:03:54 [DEBUG] train episode 2641: reward = 138.00, steps = 138\n",
      "00:03:54 [DEBUG] train episode 2642: reward = 200.00, steps = 200\n",
      "00:03:54 [DEBUG] train episode 2643: reward = 147.00, steps = 147\n",
      "00:03:55 [DEBUG] train episode 2644: reward = 172.00, steps = 172\n",
      "00:03:55 [DEBUG] train episode 2645: reward = 134.00, steps = 134\n",
      "00:03:55 [DEBUG] train episode 2646: reward = 145.00, steps = 145\n",
      "00:03:55 [DEBUG] train episode 2647: reward = 152.00, steps = 152\n",
      "00:03:55 [DEBUG] train episode 2648: reward = 131.00, steps = 131\n",
      "00:03:55 [DEBUG] train episode 2649: reward = 200.00, steps = 200\n",
      "00:03:55 [DEBUG] train episode 2650: reward = 146.00, steps = 146\n",
      "00:03:55 [DEBUG] train episode 2651: reward = 120.00, steps = 120\n",
      "00:03:55 [DEBUG] train episode 2652: reward = 136.00, steps = 136\n",
      "00:03:55 [DEBUG] train episode 2653: reward = 143.00, steps = 143\n",
      "00:03:55 [DEBUG] train episode 2654: reward = 196.00, steps = 196\n",
      "00:03:55 [DEBUG] train episode 2655: reward = 165.00, steps = 165\n",
      "00:03:55 [DEBUG] train episode 2656: reward = 144.00, steps = 144\n",
      "00:03:55 [DEBUG] train episode 2657: reward = 179.00, steps = 179\n",
      "00:03:55 [DEBUG] train episode 2658: reward = 125.00, steps = 125\n",
      "00:03:55 [DEBUG] train episode 2659: reward = 185.00, steps = 185\n",
      "00:03:55 [DEBUG] train episode 2660: reward = 134.00, steps = 134\n",
      "00:03:55 [DEBUG] train episode 2661: reward = 159.00, steps = 159\n",
      "00:03:55 [DEBUG] train episode 2662: reward = 126.00, steps = 126\n",
      "00:03:55 [DEBUG] train episode 2663: reward = 103.00, steps = 103\n",
      "00:03:55 [DEBUG] train episode 2664: reward = 200.00, steps = 200\n",
      "00:03:56 [DEBUG] train episode 2665: reward = 183.00, steps = 183\n",
      "00:03:56 [DEBUG] train episode 2666: reward = 145.00, steps = 145\n",
      "00:03:56 [DEBUG] train episode 2667: reward = 198.00, steps = 198\n",
      "00:03:56 [DEBUG] train episode 2668: reward = 156.00, steps = 156\n",
      "00:03:56 [DEBUG] train episode 2669: reward = 132.00, steps = 132\n",
      "00:03:56 [DEBUG] train episode 2670: reward = 137.00, steps = 137\n",
      "00:03:56 [DEBUG] train episode 2671: reward = 145.00, steps = 145\n",
      "00:03:56 [DEBUG] train episode 2672: reward = 151.00, steps = 151\n",
      "00:03:56 [DEBUG] train episode 2673: reward = 200.00, steps = 200\n",
      "00:03:56 [DEBUG] train episode 2674: reward = 163.00, steps = 163\n",
      "00:03:56 [DEBUG] train episode 2675: reward = 185.00, steps = 185\n",
      "00:03:56 [DEBUG] train episode 2676: reward = 200.00, steps = 200\n",
      "00:03:56 [DEBUG] train episode 2677: reward = 146.00, steps = 146\n",
      "00:03:56 [DEBUG] train episode 2678: reward = 134.00, steps = 134\n",
      "00:03:56 [DEBUG] train episode 2679: reward = 162.00, steps = 162\n",
      "00:03:56 [DEBUG] train episode 2680: reward = 136.00, steps = 136\n",
      "00:03:56 [DEBUG] train episode 2681: reward = 198.00, steps = 198\n",
      "00:03:56 [DEBUG] train episode 2682: reward = 125.00, steps = 125\n",
      "00:03:56 [DEBUG] train episode 2683: reward = 120.00, steps = 120\n",
      "00:03:56 [DEBUG] train episode 2684: reward = 128.00, steps = 128\n",
      "00:03:56 [DEBUG] train episode 2685: reward = 143.00, steps = 143\n",
      "00:03:56 [DEBUG] train episode 2686: reward = 139.00, steps = 139\n",
      "00:03:57 [DEBUG] train episode 2687: reward = 139.00, steps = 139\n",
      "00:03:57 [DEBUG] train episode 2688: reward = 126.00, steps = 126\n",
      "00:03:57 [DEBUG] train episode 2689: reward = 146.00, steps = 146\n",
      "00:03:57 [DEBUG] train episode 2690: reward = 145.00, steps = 145\n",
      "00:03:57 [DEBUG] train episode 2691: reward = 200.00, steps = 200\n",
      "00:03:57 [DEBUG] train episode 2692: reward = 132.00, steps = 132\n",
      "00:03:57 [DEBUG] train episode 2693: reward = 173.00, steps = 173\n",
      "00:03:57 [DEBUG] train episode 2694: reward = 156.00, steps = 156\n",
      "00:03:57 [DEBUG] train episode 2695: reward = 137.00, steps = 137\n",
      "00:03:57 [DEBUG] train episode 2696: reward = 172.00, steps = 172\n",
      "00:03:57 [DEBUG] train episode 2697: reward = 177.00, steps = 177\n",
      "00:03:57 [DEBUG] train episode 2698: reward = 190.00, steps = 190\n",
      "00:03:57 [DEBUG] train episode 2699: reward = 162.00, steps = 162\n",
      "00:03:57 [DEBUG] train episode 2700: reward = 158.00, steps = 158\n",
      "00:03:57 [DEBUG] train episode 2701: reward = 141.00, steps = 141\n",
      "00:03:57 [DEBUG] train episode 2702: reward = 135.00, steps = 135\n",
      "00:03:57 [DEBUG] train episode 2703: reward = 146.00, steps = 146\n",
      "00:03:57 [DEBUG] train episode 2704: reward = 200.00, steps = 200\n",
      "00:03:57 [DEBUG] train episode 2705: reward = 178.00, steps = 178\n",
      "00:03:57 [DEBUG] train episode 2706: reward = 146.00, steps = 146\n",
      "00:03:57 [DEBUG] train episode 2707: reward = 181.00, steps = 181\n",
      "00:03:58 [DEBUG] train episode 2708: reward = 162.00, steps = 162\n",
      "00:03:58 [DEBUG] train episode 2709: reward = 135.00, steps = 135\n",
      "00:03:58 [DEBUG] train episode 2710: reward = 144.00, steps = 144\n",
      "00:03:58 [DEBUG] train episode 2711: reward = 200.00, steps = 200\n",
      "00:03:58 [DEBUG] train episode 2712: reward = 148.00, steps = 148\n",
      "00:03:58 [DEBUG] train episode 2713: reward = 196.00, steps = 196\n",
      "00:03:58 [DEBUG] train episode 2714: reward = 200.00, steps = 200\n",
      "00:03:58 [DEBUG] train episode 2715: reward = 182.00, steps = 182\n",
      "00:03:58 [DEBUG] train episode 2716: reward = 143.00, steps = 143\n",
      "00:03:58 [DEBUG] train episode 2717: reward = 200.00, steps = 200\n",
      "00:03:58 [DEBUG] train episode 2718: reward = 172.00, steps = 172\n",
      "00:03:58 [DEBUG] train episode 2719: reward = 185.00, steps = 185\n",
      "00:03:58 [DEBUG] train episode 2720: reward = 130.00, steps = 130\n",
      "00:03:58 [DEBUG] train episode 2721: reward = 200.00, steps = 200\n",
      "00:03:58 [DEBUG] train episode 2722: reward = 159.00, steps = 159\n",
      "00:03:58 [DEBUG] train episode 2723: reward = 200.00, steps = 200\n",
      "00:03:58 [DEBUG] train episode 2724: reward = 131.00, steps = 131\n",
      "00:03:58 [DEBUG] train episode 2725: reward = 164.00, steps = 164\n",
      "00:03:58 [DEBUG] train episode 2726: reward = 162.00, steps = 162\n",
      "00:03:58 [DEBUG] train episode 2727: reward = 200.00, steps = 200\n",
      "00:03:59 [DEBUG] train episode 2728: reward = 200.00, steps = 200\n",
      "00:03:59 [DEBUG] train episode 2729: reward = 197.00, steps = 197\n",
      "00:03:59 [DEBUG] train episode 2730: reward = 153.00, steps = 153\n",
      "00:03:59 [DEBUG] train episode 2731: reward = 175.00, steps = 175\n",
      "00:03:59 [DEBUG] train episode 2732: reward = 143.00, steps = 143\n",
      "00:03:59 [DEBUG] train episode 2733: reward = 164.00, steps = 164\n",
      "00:03:59 [DEBUG] train episode 2734: reward = 200.00, steps = 200\n",
      "00:03:59 [DEBUG] train episode 2735: reward = 200.00, steps = 200\n",
      "00:03:59 [DEBUG] train episode 2736: reward = 200.00, steps = 200\n",
      "00:03:59 [DEBUG] train episode 2737: reward = 200.00, steps = 200\n",
      "00:03:59 [DEBUG] train episode 2738: reward = 178.00, steps = 178\n",
      "00:03:59 [DEBUG] train episode 2739: reward = 200.00, steps = 200\n",
      "00:03:59 [DEBUG] train episode 2740: reward = 132.00, steps = 132\n",
      "00:03:59 [DEBUG] train episode 2741: reward = 200.00, steps = 200\n",
      "00:03:59 [DEBUG] train episode 2742: reward = 150.00, steps = 150\n",
      "00:03:59 [DEBUG] train episode 2743: reward = 200.00, steps = 200\n",
      "00:03:59 [DEBUG] train episode 2744: reward = 200.00, steps = 200\n",
      "00:03:59 [DEBUG] train episode 2745: reward = 186.00, steps = 186\n",
      "00:04:00 [DEBUG] train episode 2746: reward = 194.00, steps = 194\n",
      "00:04:00 [DEBUG] train episode 2747: reward = 200.00, steps = 200\n",
      "00:04:00 [DEBUG] train episode 2748: reward = 186.00, steps = 186\n",
      "00:04:00 [DEBUG] train episode 2749: reward = 200.00, steps = 200\n",
      "00:04:00 [DEBUG] train episode 2750: reward = 200.00, steps = 200\n",
      "00:04:00 [DEBUG] train episode 2751: reward = 200.00, steps = 200\n",
      "00:04:00 [DEBUG] train episode 2752: reward = 144.00, steps = 144\n",
      "00:04:00 [DEBUG] train episode 2753: reward = 198.00, steps = 198\n",
      "00:04:00 [DEBUG] train episode 2754: reward = 200.00, steps = 200\n",
      "00:04:00 [DEBUG] train episode 2755: reward = 192.00, steps = 192\n",
      "00:04:00 [DEBUG] train episode 2756: reward = 200.00, steps = 200\n",
      "00:04:00 [DEBUG] train episode 2757: reward = 200.00, steps = 200\n",
      "00:04:00 [DEBUG] train episode 2758: reward = 190.00, steps = 190\n",
      "00:04:00 [DEBUG] train episode 2759: reward = 187.00, steps = 187\n",
      "00:04:00 [DEBUG] train episode 2760: reward = 200.00, steps = 200\n",
      "00:04:00 [DEBUG] train episode 2761: reward = 200.00, steps = 200\n",
      "00:04:00 [DEBUG] train episode 2762: reward = 200.00, steps = 200\n",
      "00:04:01 [DEBUG] train episode 2763: reward = 200.00, steps = 200\n",
      "00:04:01 [DEBUG] train episode 2764: reward = 200.00, steps = 200\n",
      "00:04:01 [DEBUG] train episode 2765: reward = 197.00, steps = 197\n",
      "00:04:01 [DEBUG] train episode 2766: reward = 184.00, steps = 184\n",
      "00:04:01 [DEBUG] train episode 2767: reward = 200.00, steps = 200\n",
      "00:04:01 [DEBUG] train episode 2768: reward = 200.00, steps = 200\n",
      "00:04:01 [DEBUG] train episode 2769: reward = 200.00, steps = 200\n",
      "00:04:01 [DEBUG] train episode 2770: reward = 194.00, steps = 194\n",
      "00:04:01 [DEBUG] train episode 2771: reward = 137.00, steps = 137\n",
      "00:04:01 [DEBUG] train episode 2772: reward = 200.00, steps = 200\n",
      "00:04:01 [DEBUG] train episode 2773: reward = 200.00, steps = 200\n",
      "00:04:01 [DEBUG] train episode 2774: reward = 200.00, steps = 200\n",
      "00:04:01 [DEBUG] train episode 2775: reward = 190.00, steps = 190\n",
      "00:04:01 [DEBUG] train episode 2776: reward = 200.00, steps = 200\n",
      "00:04:01 [DEBUG] train episode 2777: reward = 200.00, steps = 200\n",
      "00:04:01 [DEBUG] train episode 2778: reward = 200.00, steps = 200\n",
      "00:04:01 [DEBUG] train episode 2779: reward = 176.00, steps = 176\n",
      "00:04:01 [DEBUG] train episode 2780: reward = 200.00, steps = 200\n",
      "00:04:02 [DEBUG] train episode 2781: reward = 200.00, steps = 200\n",
      "00:04:02 [DEBUG] train episode 2782: reward = 200.00, steps = 200\n",
      "00:04:02 [DEBUG] train episode 2783: reward = 165.00, steps = 165\n",
      "00:04:02 [DEBUG] train episode 2784: reward = 199.00, steps = 199\n",
      "00:04:02 [DEBUG] train episode 2785: reward = 199.00, steps = 199\n",
      "00:04:02 [DEBUG] train episode 2786: reward = 200.00, steps = 200\n",
      "00:04:02 [DEBUG] train episode 2787: reward = 200.00, steps = 200\n",
      "00:04:02 [DEBUG] train episode 2788: reward = 200.00, steps = 200\n",
      "00:04:02 [DEBUG] train episode 2789: reward = 144.00, steps = 144\n",
      "00:04:02 [DEBUG] train episode 2790: reward = 193.00, steps = 193\n",
      "00:04:02 [DEBUG] train episode 2791: reward = 160.00, steps = 160\n",
      "00:04:02 [DEBUG] train episode 2792: reward = 200.00, steps = 200\n",
      "00:04:02 [DEBUG] train episode 2793: reward = 200.00, steps = 200\n",
      "00:04:02 [DEBUG] train episode 2794: reward = 200.00, steps = 200\n",
      "00:04:02 [DEBUG] train episode 2795: reward = 164.00, steps = 164\n",
      "00:04:02 [DEBUG] train episode 2796: reward = 171.00, steps = 171\n",
      "00:04:02 [DEBUG] train episode 2797: reward = 200.00, steps = 200\n",
      "00:04:03 [DEBUG] train episode 2798: reward = 177.00, steps = 177\n",
      "00:04:03 [DEBUG] train episode 2799: reward = 200.00, steps = 200\n",
      "00:04:03 [DEBUG] train episode 2800: reward = 182.00, steps = 182\n",
      "00:04:03 [DEBUG] train episode 2801: reward = 144.00, steps = 144\n",
      "00:04:03 [DEBUG] train episode 2802: reward = 162.00, steps = 162\n",
      "00:04:03 [DEBUG] train episode 2803: reward = 200.00, steps = 200\n",
      "00:04:03 [DEBUG] train episode 2804: reward = 183.00, steps = 183\n",
      "00:04:03 [DEBUG] train episode 2805: reward = 200.00, steps = 200\n",
      "00:04:03 [DEBUG] train episode 2806: reward = 165.00, steps = 165\n",
      "00:04:03 [DEBUG] train episode 2807: reward = 200.00, steps = 200\n",
      "00:04:03 [DEBUG] train episode 2808: reward = 174.00, steps = 174\n",
      "00:04:03 [DEBUG] train episode 2809: reward = 200.00, steps = 200\n",
      "00:04:03 [DEBUG] train episode 2810: reward = 170.00, steps = 170\n",
      "00:04:03 [DEBUG] train episode 2811: reward = 157.00, steps = 157\n",
      "00:04:03 [DEBUG] train episode 2812: reward = 156.00, steps = 156\n",
      "00:04:03 [DEBUG] train episode 2813: reward = 156.00, steps = 156\n",
      "00:04:03 [DEBUG] train episode 2814: reward = 200.00, steps = 200\n",
      "00:04:03 [DEBUG] train episode 2815: reward = 183.00, steps = 183\n",
      "00:04:03 [DEBUG] train episode 2816: reward = 171.00, steps = 171\n",
      "00:04:04 [DEBUG] train episode 2817: reward = 180.00, steps = 180\n",
      "00:04:04 [DEBUG] train episode 2818: reward = 186.00, steps = 186\n",
      "00:04:04 [DEBUG] train episode 2819: reward = 153.00, steps = 153\n",
      "00:04:04 [DEBUG] train episode 2820: reward = 145.00, steps = 145\n",
      "00:04:04 [DEBUG] train episode 2821: reward = 194.00, steps = 194\n",
      "00:04:04 [DEBUG] train episode 2822: reward = 129.00, steps = 129\n",
      "00:04:04 [DEBUG] train episode 2823: reward = 198.00, steps = 198\n",
      "00:04:04 [DEBUG] train episode 2824: reward = 132.00, steps = 132\n",
      "00:04:04 [DEBUG] train episode 2825: reward = 164.00, steps = 164\n",
      "00:04:04 [DEBUG] train episode 2826: reward = 187.00, steps = 187\n",
      "00:04:04 [DEBUG] train episode 2827: reward = 110.00, steps = 110\n",
      "00:04:04 [DEBUG] train episode 2828: reward = 200.00, steps = 200\n",
      "00:04:04 [DEBUG] train episode 2829: reward = 187.00, steps = 187\n",
      "00:04:04 [DEBUG] train episode 2830: reward = 151.00, steps = 151\n",
      "00:04:04 [DEBUG] train episode 2831: reward = 158.00, steps = 158\n",
      "00:04:04 [DEBUG] train episode 2832: reward = 130.00, steps = 130\n",
      "00:04:04 [DEBUG] train episode 2833: reward = 149.00, steps = 149\n",
      "00:04:04 [DEBUG] train episode 2834: reward = 183.00, steps = 183\n",
      "00:04:04 [DEBUG] train episode 2835: reward = 135.00, steps = 135\n",
      "00:04:04 [DEBUG] train episode 2836: reward = 115.00, steps = 115\n",
      "00:04:05 [DEBUG] train episode 2837: reward = 151.00, steps = 151\n",
      "00:04:05 [DEBUG] train episode 2838: reward = 132.00, steps = 132\n",
      "00:04:05 [DEBUG] train episode 2839: reward = 174.00, steps = 174\n",
      "00:04:05 [DEBUG] train episode 2840: reward = 118.00, steps = 118\n",
      "00:04:05 [DEBUG] train episode 2841: reward = 139.00, steps = 139\n",
      "00:04:05 [DEBUG] train episode 2842: reward = 135.00, steps = 135\n",
      "00:04:05 [DEBUG] train episode 2843: reward = 134.00, steps = 134\n",
      "00:04:05 [DEBUG] train episode 2844: reward = 178.00, steps = 178\n",
      "00:04:05 [DEBUG] train episode 2845: reward = 137.00, steps = 137\n",
      "00:04:05 [DEBUG] train episode 2846: reward = 200.00, steps = 200\n",
      "00:04:05 [DEBUG] train episode 2847: reward = 137.00, steps = 137\n",
      "00:04:05 [DEBUG] train episode 2848: reward = 117.00, steps = 117\n",
      "00:04:05 [DEBUG] train episode 2849: reward = 180.00, steps = 180\n",
      "00:04:05 [DEBUG] train episode 2850: reward = 155.00, steps = 155\n",
      "00:04:05 [DEBUG] train episode 2851: reward = 150.00, steps = 150\n",
      "00:04:05 [DEBUG] train episode 2852: reward = 158.00, steps = 158\n",
      "00:04:05 [DEBUG] train episode 2853: reward = 162.00, steps = 162\n",
      "00:04:05 [DEBUG] train episode 2854: reward = 185.00, steps = 185\n",
      "00:04:05 [DEBUG] train episode 2855: reward = 109.00, steps = 109\n",
      "00:04:05 [DEBUG] train episode 2856: reward = 130.00, steps = 130\n",
      "00:04:05 [DEBUG] train episode 2857: reward = 162.00, steps = 162\n",
      "00:04:05 [DEBUG] train episode 2858: reward = 166.00, steps = 166\n",
      "00:04:06 [DEBUG] train episode 2859: reward = 125.00, steps = 125\n",
      "00:04:06 [DEBUG] train episode 2860: reward = 200.00, steps = 200\n",
      "00:04:06 [DEBUG] train episode 2861: reward = 164.00, steps = 164\n",
      "00:04:06 [DEBUG] train episode 2862: reward = 131.00, steps = 131\n",
      "00:04:06 [DEBUG] train episode 2863: reward = 135.00, steps = 135\n",
      "00:04:06 [DEBUG] train episode 2864: reward = 166.00, steps = 166\n",
      "00:04:06 [DEBUG] train episode 2865: reward = 138.00, steps = 138\n",
      "00:04:06 [DEBUG] train episode 2866: reward = 200.00, steps = 200\n",
      "00:04:06 [DEBUG] train episode 2867: reward = 184.00, steps = 184\n",
      "00:04:06 [DEBUG] train episode 2868: reward = 135.00, steps = 135\n",
      "00:04:06 [DEBUG] train episode 2869: reward = 131.00, steps = 131\n",
      "00:04:06 [DEBUG] train episode 2870: reward = 156.00, steps = 156\n",
      "00:04:06 [DEBUG] train episode 2871: reward = 139.00, steps = 139\n",
      "00:04:06 [DEBUG] train episode 2872: reward = 200.00, steps = 200\n",
      "00:04:06 [DEBUG] train episode 2873: reward = 200.00, steps = 200\n",
      "00:04:06 [DEBUG] train episode 2874: reward = 174.00, steps = 174\n",
      "00:04:06 [DEBUG] train episode 2875: reward = 161.00, steps = 161\n",
      "00:04:06 [DEBUG] train episode 2876: reward = 110.00, steps = 110\n",
      "00:04:06 [DEBUG] train episode 2877: reward = 110.00, steps = 110\n",
      "00:04:06 [DEBUG] train episode 2878: reward = 150.00, steps = 150\n",
      "00:04:06 [DEBUG] train episode 2879: reward = 140.00, steps = 140\n",
      "00:04:07 [DEBUG] train episode 2880: reward = 160.00, steps = 160\n",
      "00:04:07 [DEBUG] train episode 2881: reward = 168.00, steps = 168\n",
      "00:04:07 [DEBUG] train episode 2882: reward = 144.00, steps = 144\n",
      "00:04:07 [DEBUG] train episode 2883: reward = 167.00, steps = 167\n",
      "00:04:07 [DEBUG] train episode 2884: reward = 151.00, steps = 151\n",
      "00:04:07 [DEBUG] train episode 2885: reward = 131.00, steps = 131\n",
      "00:04:07 [DEBUG] train episode 2886: reward = 200.00, steps = 200\n",
      "00:04:07 [DEBUG] train episode 2887: reward = 135.00, steps = 135\n",
      "00:04:07 [DEBUG] train episode 2888: reward = 184.00, steps = 184\n",
      "00:04:07 [DEBUG] train episode 2889: reward = 160.00, steps = 160\n",
      "00:04:07 [DEBUG] train episode 2890: reward = 150.00, steps = 150\n",
      "00:04:07 [DEBUG] train episode 2891: reward = 169.00, steps = 169\n",
      "00:04:07 [DEBUG] train episode 2892: reward = 152.00, steps = 152\n",
      "00:04:07 [DEBUG] train episode 2893: reward = 161.00, steps = 161\n",
      "00:04:07 [DEBUG] train episode 2894: reward = 195.00, steps = 195\n",
      "00:04:07 [DEBUG] train episode 2895: reward = 144.00, steps = 144\n",
      "00:04:07 [DEBUG] train episode 2896: reward = 160.00, steps = 160\n",
      "00:04:07 [DEBUG] train episode 2897: reward = 163.00, steps = 163\n",
      "00:04:07 [DEBUG] train episode 2898: reward = 163.00, steps = 163\n",
      "00:04:07 [DEBUG] train episode 2899: reward = 171.00, steps = 171\n",
      "00:04:08 [DEBUG] train episode 2900: reward = 150.00, steps = 150\n",
      "00:04:08 [DEBUG] train episode 2901: reward = 200.00, steps = 200\n",
      "00:04:08 [DEBUG] train episode 2902: reward = 152.00, steps = 152\n",
      "00:04:08 [DEBUG] train episode 2903: reward = 185.00, steps = 185\n",
      "00:04:08 [DEBUG] train episode 2904: reward = 169.00, steps = 169\n",
      "00:04:08 [DEBUG] train episode 2905: reward = 200.00, steps = 200\n",
      "00:04:08 [DEBUG] train episode 2906: reward = 166.00, steps = 166\n",
      "00:04:08 [DEBUG] train episode 2907: reward = 200.00, steps = 200\n",
      "00:04:08 [DEBUG] train episode 2908: reward = 116.00, steps = 116\n",
      "00:04:08 [DEBUG] train episode 2909: reward = 112.00, steps = 112\n",
      "00:04:08 [DEBUG] train episode 2910: reward = 189.00, steps = 189\n",
      "00:04:08 [DEBUG] train episode 2911: reward = 180.00, steps = 180\n",
      "00:04:08 [DEBUG] train episode 2912: reward = 164.00, steps = 164\n",
      "00:04:08 [DEBUG] train episode 2913: reward = 158.00, steps = 158\n",
      "00:04:08 [DEBUG] train episode 2914: reward = 131.00, steps = 131\n",
      "00:04:08 [DEBUG] train episode 2915: reward = 200.00, steps = 200\n",
      "00:04:08 [DEBUG] train episode 2916: reward = 170.00, steps = 170\n",
      "00:04:08 [DEBUG] train episode 2917: reward = 120.00, steps = 120\n",
      "00:04:08 [DEBUG] train episode 2918: reward = 134.00, steps = 134\n",
      "00:04:08 [DEBUG] train episode 2919: reward = 160.00, steps = 160\n",
      "00:04:09 [DEBUG] train episode 2920: reward = 185.00, steps = 185\n",
      "00:04:09 [DEBUG] train episode 2921: reward = 142.00, steps = 142\n",
      "00:04:09 [DEBUG] train episode 2922: reward = 197.00, steps = 197\n",
      "00:04:09 [DEBUG] train episode 2923: reward = 140.00, steps = 140\n",
      "00:04:09 [DEBUG] train episode 2924: reward = 139.00, steps = 139\n",
      "00:04:09 [DEBUG] train episode 2925: reward = 106.00, steps = 106\n",
      "00:04:09 [DEBUG] train episode 2926: reward = 157.00, steps = 157\n",
      "00:04:09 [DEBUG] train episode 2927: reward = 200.00, steps = 200\n",
      "00:04:09 [DEBUG] train episode 2928: reward = 153.00, steps = 153\n",
      "00:04:09 [DEBUG] train episode 2929: reward = 193.00, steps = 193\n",
      "00:04:09 [DEBUG] train episode 2930: reward = 127.00, steps = 127\n",
      "00:04:09 [DEBUG] train episode 2931: reward = 114.00, steps = 114\n",
      "00:04:09 [DEBUG] train episode 2932: reward = 179.00, steps = 179\n",
      "00:04:09 [DEBUG] train episode 2933: reward = 139.00, steps = 139\n",
      "00:04:09 [DEBUG] train episode 2934: reward = 144.00, steps = 144\n",
      "00:04:09 [DEBUG] train episode 2935: reward = 189.00, steps = 189\n",
      "00:04:09 [DEBUG] train episode 2936: reward = 129.00, steps = 129\n",
      "00:04:09 [DEBUG] train episode 2937: reward = 143.00, steps = 143\n",
      "00:04:09 [DEBUG] train episode 2938: reward = 157.00, steps = 157\n",
      "00:04:09 [DEBUG] train episode 2939: reward = 146.00, steps = 146\n",
      "00:04:09 [DEBUG] train episode 2940: reward = 130.00, steps = 130\n",
      "00:04:09 [DEBUG] train episode 2941: reward = 138.00, steps = 138\n",
      "00:04:10 [DEBUG] train episode 2942: reward = 117.00, steps = 117\n",
      "00:04:10 [DEBUG] train episode 2943: reward = 168.00, steps = 168\n",
      "00:04:10 [DEBUG] train episode 2944: reward = 148.00, steps = 148\n",
      "00:04:10 [DEBUG] train episode 2945: reward = 158.00, steps = 158\n",
      "00:04:10 [DEBUG] train episode 2946: reward = 119.00, steps = 119\n",
      "00:04:10 [DEBUG] train episode 2947: reward = 200.00, steps = 200\n",
      "00:04:10 [DEBUG] train episode 2948: reward = 164.00, steps = 164\n",
      "00:04:10 [DEBUG] train episode 2949: reward = 146.00, steps = 146\n",
      "00:04:10 [DEBUG] train episode 2950: reward = 136.00, steps = 136\n",
      "00:04:10 [DEBUG] train episode 2951: reward = 155.00, steps = 155\n",
      "00:04:10 [DEBUG] train episode 2952: reward = 200.00, steps = 200\n",
      "00:04:10 [DEBUG] train episode 2953: reward = 168.00, steps = 168\n",
      "00:04:10 [DEBUG] train episode 2954: reward = 169.00, steps = 169\n",
      "00:04:10 [DEBUG] train episode 2955: reward = 198.00, steps = 198\n",
      "00:04:10 [DEBUG] train episode 2956: reward = 200.00, steps = 200\n",
      "00:04:10 [DEBUG] train episode 2957: reward = 200.00, steps = 200\n",
      "00:04:10 [DEBUG] train episode 2958: reward = 177.00, steps = 177\n",
      "00:04:10 [DEBUG] train episode 2959: reward = 148.00, steps = 148\n",
      "00:04:10 [DEBUG] train episode 2960: reward = 137.00, steps = 137\n",
      "00:04:10 [DEBUG] train episode 2961: reward = 157.00, steps = 157\n",
      "00:04:10 [DEBUG] train episode 2962: reward = 146.00, steps = 146\n",
      "00:04:11 [DEBUG] train episode 2963: reward = 117.00, steps = 117\n",
      "00:04:11 [DEBUG] train episode 2964: reward = 168.00, steps = 168\n",
      "00:04:11 [DEBUG] train episode 2965: reward = 200.00, steps = 200\n",
      "00:04:11 [DEBUG] train episode 2966: reward = 156.00, steps = 156\n",
      "00:04:11 [DEBUG] train episode 2967: reward = 134.00, steps = 134\n",
      "00:04:11 [DEBUG] train episode 2968: reward = 171.00, steps = 171\n",
      "00:04:11 [DEBUG] train episode 2969: reward = 140.00, steps = 140\n",
      "00:04:11 [DEBUG] train episode 2970: reward = 170.00, steps = 170\n",
      "00:04:11 [DEBUG] train episode 2971: reward = 155.00, steps = 155\n",
      "00:04:11 [DEBUG] train episode 2972: reward = 121.00, steps = 121\n",
      "00:04:11 [DEBUG] train episode 2973: reward = 200.00, steps = 200\n",
      "00:04:11 [DEBUG] train episode 2974: reward = 147.00, steps = 147\n",
      "00:04:11 [DEBUG] train episode 2975: reward = 119.00, steps = 119\n",
      "00:04:11 [DEBUG] train episode 2976: reward = 140.00, steps = 140\n",
      "00:04:11 [DEBUG] train episode 2977: reward = 200.00, steps = 200\n",
      "00:04:11 [DEBUG] train episode 2978: reward = 161.00, steps = 161\n",
      "00:04:11 [DEBUG] train episode 2979: reward = 154.00, steps = 154\n",
      "00:04:11 [DEBUG] train episode 2980: reward = 151.00, steps = 151\n",
      "00:04:11 [DEBUG] train episode 2981: reward = 153.00, steps = 153\n",
      "00:04:11 [DEBUG] train episode 2982: reward = 188.00, steps = 188\n",
      "00:04:11 [DEBUG] train episode 2983: reward = 174.00, steps = 174\n",
      "00:04:12 [DEBUG] train episode 2984: reward = 148.00, steps = 148\n",
      "00:04:12 [DEBUG] train episode 2985: reward = 163.00, steps = 163\n",
      "00:04:12 [DEBUG] train episode 2986: reward = 133.00, steps = 133\n",
      "00:04:12 [DEBUG] train episode 2987: reward = 200.00, steps = 200\n",
      "00:04:12 [DEBUG] train episode 2988: reward = 200.00, steps = 200\n",
      "00:04:12 [DEBUG] train episode 2989: reward = 200.00, steps = 200\n",
      "00:04:12 [DEBUG] train episode 2990: reward = 198.00, steps = 198\n",
      "00:04:12 [DEBUG] train episode 2991: reward = 169.00, steps = 169\n",
      "00:04:12 [DEBUG] train episode 2992: reward = 200.00, steps = 200\n",
      "00:04:12 [DEBUG] train episode 2993: reward = 130.00, steps = 130\n",
      "00:04:12 [DEBUG] train episode 2994: reward = 179.00, steps = 179\n",
      "00:04:12 [DEBUG] train episode 2995: reward = 142.00, steps = 142\n",
      "00:04:12 [DEBUG] train episode 2996: reward = 133.00, steps = 133\n",
      "00:04:12 [DEBUG] train episode 2997: reward = 147.00, steps = 147\n",
      "00:04:12 [DEBUG] train episode 2998: reward = 200.00, steps = 200\n",
      "00:04:12 [DEBUG] train episode 2999: reward = 200.00, steps = 200\n",
      "00:04:12 [DEBUG] train episode 3000: reward = 169.00, steps = 169\n",
      "00:04:12 [DEBUG] train episode 3001: reward = 163.00, steps = 163\n",
      "00:04:12 [DEBUG] train episode 3002: reward = 175.00, steps = 175\n",
      "00:04:12 [DEBUG] train episode 3003: reward = 194.00, steps = 194\n",
      "00:04:13 [DEBUG] train episode 3004: reward = 200.00, steps = 200\n",
      "00:04:13 [DEBUG] train episode 3005: reward = 200.00, steps = 200\n",
      "00:04:13 [DEBUG] train episode 3006: reward = 182.00, steps = 182\n",
      "00:04:13 [DEBUG] train episode 3007: reward = 168.00, steps = 168\n",
      "00:04:13 [DEBUG] train episode 3008: reward = 200.00, steps = 200\n",
      "00:04:13 [DEBUG] train episode 3009: reward = 200.00, steps = 200\n",
      "00:04:13 [DEBUG] train episode 3010: reward = 145.00, steps = 145\n",
      "00:04:13 [DEBUG] train episode 3011: reward = 200.00, steps = 200\n",
      "00:04:13 [DEBUG] train episode 3012: reward = 200.00, steps = 200\n",
      "00:04:13 [DEBUG] train episode 3013: reward = 200.00, steps = 200\n",
      "00:04:13 [DEBUG] train episode 3014: reward = 176.00, steps = 176\n",
      "00:04:13 [DEBUG] train episode 3015: reward = 200.00, steps = 200\n",
      "00:04:13 [DEBUG] train episode 3016: reward = 200.00, steps = 200\n",
      "00:04:13 [DEBUG] train episode 3017: reward = 200.00, steps = 200\n",
      "00:04:13 [DEBUG] train episode 3018: reward = 200.00, steps = 200\n",
      "00:04:13 [DEBUG] train episode 3019: reward = 172.00, steps = 172\n",
      "00:04:13 [DEBUG] train episode 3020: reward = 177.00, steps = 177\n",
      "00:04:13 [DEBUG] train episode 3021: reward = 122.00, steps = 122\n",
      "00:04:14 [DEBUG] train episode 3022: reward = 200.00, steps = 200\n",
      "00:04:14 [DEBUG] train episode 3023: reward = 187.00, steps = 187\n",
      "00:04:14 [DEBUG] train episode 3024: reward = 200.00, steps = 200\n",
      "00:04:14 [DEBUG] train episode 3025: reward = 178.00, steps = 178\n",
      "00:04:14 [DEBUG] train episode 3026: reward = 200.00, steps = 200\n",
      "00:04:14 [DEBUG] train episode 3027: reward = 200.00, steps = 200\n",
      "00:04:14 [DEBUG] train episode 3028: reward = 200.00, steps = 200\n",
      "00:04:14 [DEBUG] train episode 3029: reward = 200.00, steps = 200\n",
      "00:04:14 [DEBUG] train episode 3030: reward = 172.00, steps = 172\n",
      "00:04:14 [DEBUG] train episode 3031: reward = 200.00, steps = 200\n",
      "00:04:14 [DEBUG] train episode 3032: reward = 154.00, steps = 154\n",
      "00:04:14 [DEBUG] train episode 3033: reward = 193.00, steps = 193\n",
      "00:04:14 [DEBUG] train episode 3034: reward = 159.00, steps = 159\n",
      "00:04:14 [DEBUG] train episode 3035: reward = 200.00, steps = 200\n",
      "00:04:14 [DEBUG] train episode 3036: reward = 195.00, steps = 195\n",
      "00:04:14 [DEBUG] train episode 3037: reward = 156.00, steps = 156\n",
      "00:04:14 [DEBUG] train episode 3038: reward = 200.00, steps = 200\n",
      "00:04:15 [DEBUG] train episode 3039: reward = 200.00, steps = 200\n",
      "00:04:15 [DEBUG] train episode 3040: reward = 175.00, steps = 175\n",
      "00:04:15 [DEBUG] train episode 3041: reward = 200.00, steps = 200\n",
      "00:04:15 [DEBUG] train episode 3042: reward = 156.00, steps = 156\n",
      "00:04:15 [DEBUG] train episode 3043: reward = 136.00, steps = 136\n",
      "00:04:15 [DEBUG] train episode 3044: reward = 149.00, steps = 149\n",
      "00:04:15 [DEBUG] train episode 3045: reward = 200.00, steps = 200\n",
      "00:04:15 [DEBUG] train episode 3046: reward = 150.00, steps = 150\n",
      "00:04:15 [DEBUG] train episode 3047: reward = 188.00, steps = 188\n",
      "00:04:15 [DEBUG] train episode 3048: reward = 200.00, steps = 200\n",
      "00:04:15 [DEBUG] train episode 3049: reward = 200.00, steps = 200\n",
      "00:04:15 [DEBUG] train episode 3050: reward = 188.00, steps = 188\n",
      "00:04:15 [DEBUG] train episode 3051: reward = 138.00, steps = 138\n",
      "00:04:15 [DEBUG] train episode 3052: reward = 148.00, steps = 148\n",
      "00:04:15 [DEBUG] train episode 3053: reward = 200.00, steps = 200\n",
      "00:04:15 [DEBUG] train episode 3054: reward = 200.00, steps = 200\n",
      "00:04:15 [DEBUG] train episode 3055: reward = 200.00, steps = 200\n",
      "00:04:15 [DEBUG] train episode 3056: reward = 200.00, steps = 200\n",
      "00:04:15 [DEBUG] train episode 3057: reward = 175.00, steps = 175\n",
      "00:04:16 [DEBUG] train episode 3058: reward = 159.00, steps = 159\n",
      "00:04:16 [DEBUG] train episode 3059: reward = 152.00, steps = 152\n",
      "00:04:16 [DEBUG] train episode 3060: reward = 200.00, steps = 200\n",
      "00:04:16 [DEBUG] train episode 3061: reward = 200.00, steps = 200\n",
      "00:04:16 [DEBUG] train episode 3062: reward = 200.00, steps = 200\n",
      "00:04:16 [DEBUG] train episode 3063: reward = 200.00, steps = 200\n",
      "00:04:16 [DEBUG] train episode 3064: reward = 188.00, steps = 188\n",
      "00:04:16 [DEBUG] train episode 3065: reward = 154.00, steps = 154\n",
      "00:04:16 [DEBUG] train episode 3066: reward = 200.00, steps = 200\n",
      "00:04:16 [DEBUG] train episode 3067: reward = 157.00, steps = 157\n",
      "00:04:16 [DEBUG] train episode 3068: reward = 163.00, steps = 163\n",
      "00:04:16 [DEBUG] train episode 3069: reward = 200.00, steps = 200\n",
      "00:04:16 [DEBUG] train episode 3070: reward = 200.00, steps = 200\n",
      "00:04:16 [DEBUG] train episode 3071: reward = 177.00, steps = 177\n",
      "00:04:16 [DEBUG] train episode 3072: reward = 138.00, steps = 138\n",
      "00:04:16 [DEBUG] train episode 3073: reward = 200.00, steps = 200\n",
      "00:04:16 [DEBUG] train episode 3074: reward = 168.00, steps = 168\n",
      "00:04:16 [DEBUG] train episode 3075: reward = 200.00, steps = 200\n",
      "00:04:16 [DEBUG] train episode 3076: reward = 200.00, steps = 200\n",
      "00:04:17 [DEBUG] train episode 3077: reward = 200.00, steps = 200\n",
      "00:04:17 [DEBUG] train episode 3078: reward = 197.00, steps = 197\n",
      "00:04:17 [DEBUG] train episode 3079: reward = 200.00, steps = 200\n",
      "00:04:17 [DEBUG] train episode 3080: reward = 190.00, steps = 190\n",
      "00:04:17 [DEBUG] train episode 3081: reward = 200.00, steps = 200\n",
      "00:04:17 [DEBUG] train episode 3082: reward = 200.00, steps = 200\n",
      "00:04:17 [DEBUG] train episode 3083: reward = 200.00, steps = 200\n",
      "00:04:17 [DEBUG] train episode 3084: reward = 140.00, steps = 140\n",
      "00:04:17 [DEBUG] train episode 3085: reward = 175.00, steps = 175\n",
      "00:04:17 [DEBUG] train episode 3086: reward = 200.00, steps = 200\n",
      "00:04:17 [DEBUG] train episode 3087: reward = 200.00, steps = 200\n",
      "00:04:17 [DEBUG] train episode 3088: reward = 200.00, steps = 200\n",
      "00:04:17 [DEBUG] train episode 3089: reward = 200.00, steps = 200\n",
      "00:04:17 [DEBUG] train episode 3090: reward = 200.00, steps = 200\n",
      "00:04:17 [DEBUG] train episode 3091: reward = 138.00, steps = 138\n",
      "00:04:17 [DEBUG] train episode 3092: reward = 139.00, steps = 139\n",
      "00:04:17 [DEBUG] train episode 3093: reward = 130.00, steps = 130\n",
      "00:04:17 [DEBUG] train episode 3094: reward = 187.00, steps = 187\n",
      "00:04:18 [DEBUG] train episode 3095: reward = 200.00, steps = 200\n",
      "00:04:18 [DEBUG] train episode 3096: reward = 164.00, steps = 164\n",
      "00:04:18 [DEBUG] train episode 3097: reward = 200.00, steps = 200\n",
      "00:04:18 [DEBUG] train episode 3098: reward = 199.00, steps = 199\n",
      "00:04:18 [DEBUG] train episode 3099: reward = 192.00, steps = 192\n",
      "00:04:18 [DEBUG] train episode 3100: reward = 151.00, steps = 151\n",
      "00:04:18 [DEBUG] train episode 3101: reward = 129.00, steps = 129\n",
      "00:04:18 [DEBUG] train episode 3102: reward = 124.00, steps = 124\n",
      "00:04:18 [DEBUG] train episode 3103: reward = 200.00, steps = 200\n",
      "00:04:18 [DEBUG] train episode 3104: reward = 200.00, steps = 200\n",
      "00:04:18 [DEBUG] train episode 3105: reward = 200.00, steps = 200\n",
      "00:04:18 [DEBUG] train episode 3106: reward = 200.00, steps = 200\n",
      "00:04:18 [DEBUG] train episode 3107: reward = 138.00, steps = 138\n",
      "00:04:18 [DEBUG] train episode 3108: reward = 171.00, steps = 171\n",
      "00:04:18 [DEBUG] train episode 3109: reward = 200.00, steps = 200\n",
      "00:04:18 [DEBUG] train episode 3110: reward = 192.00, steps = 192\n",
      "00:04:18 [DEBUG] train episode 3111: reward = 200.00, steps = 200\n",
      "00:04:19 [DEBUG] train episode 3112: reward = 185.00, steps = 185\n",
      "00:04:19 [DEBUG] train episode 3113: reward = 200.00, steps = 200\n",
      "00:04:19 [DEBUG] train episode 3114: reward = 170.00, steps = 170\n",
      "00:04:19 [DEBUG] train episode 3115: reward = 161.00, steps = 161\n",
      "00:04:19 [DEBUG] train episode 3116: reward = 164.00, steps = 164\n",
      "00:04:19 [DEBUG] train episode 3117: reward = 191.00, steps = 191\n",
      "00:04:19 [DEBUG] train episode 3118: reward = 137.00, steps = 137\n",
      "00:04:19 [DEBUG] train episode 3119: reward = 200.00, steps = 200\n",
      "00:04:19 [DEBUG] train episode 3120: reward = 135.00, steps = 135\n",
      "00:04:19 [DEBUG] train episode 3121: reward = 150.00, steps = 150\n",
      "00:04:19 [DEBUG] train episode 3122: reward = 166.00, steps = 166\n",
      "00:04:19 [DEBUG] train episode 3123: reward = 141.00, steps = 141\n",
      "00:04:19 [DEBUG] train episode 3124: reward = 166.00, steps = 166\n",
      "00:04:19 [DEBUG] train episode 3125: reward = 159.00, steps = 159\n",
      "00:04:19 [DEBUG] train episode 3126: reward = 200.00, steps = 200\n",
      "00:04:19 [DEBUG] train episode 3127: reward = 129.00, steps = 129\n",
      "00:04:19 [DEBUG] train episode 3128: reward = 161.00, steps = 161\n",
      "00:04:19 [DEBUG] train episode 3129: reward = 189.00, steps = 189\n",
      "00:04:19 [DEBUG] train episode 3130: reward = 200.00, steps = 200\n",
      "00:04:20 [DEBUG] train episode 3131: reward = 176.00, steps = 176\n",
      "00:04:20 [DEBUG] train episode 3132: reward = 200.00, steps = 200\n",
      "00:04:20 [DEBUG] train episode 3133: reward = 200.00, steps = 200\n",
      "00:04:20 [DEBUG] train episode 3134: reward = 200.00, steps = 200\n",
      "00:04:20 [DEBUG] train episode 3135: reward = 159.00, steps = 159\n",
      "00:04:20 [DEBUG] train episode 3136: reward = 171.00, steps = 171\n",
      "00:04:20 [DEBUG] train episode 3137: reward = 128.00, steps = 128\n",
      "00:04:20 [DEBUG] train episode 3138: reward = 138.00, steps = 138\n",
      "00:04:20 [DEBUG] train episode 3139: reward = 200.00, steps = 200\n",
      "00:04:20 [DEBUG] train episode 3140: reward = 200.00, steps = 200\n",
      "00:04:20 [DEBUG] train episode 3141: reward = 200.00, steps = 200\n",
      "00:04:20 [DEBUG] train episode 3142: reward = 134.00, steps = 134\n",
      "00:04:20 [DEBUG] train episode 3143: reward = 200.00, steps = 200\n",
      "00:04:20 [DEBUG] train episode 3144: reward = 200.00, steps = 200\n",
      "00:04:20 [DEBUG] train episode 3145: reward = 200.00, steps = 200\n",
      "00:04:20 [DEBUG] train episode 3146: reward = 200.00, steps = 200\n",
      "00:04:20 [DEBUG] train episode 3147: reward = 162.00, steps = 162\n",
      "00:04:20 [DEBUG] train episode 3148: reward = 133.00, steps = 133\n",
      "00:04:21 [DEBUG] train episode 3149: reward = 200.00, steps = 200\n",
      "00:04:21 [DEBUG] train episode 3150: reward = 200.00, steps = 200\n",
      "00:04:21 [DEBUG] train episode 3151: reward = 170.00, steps = 170\n",
      "00:04:21 [DEBUG] train episode 3152: reward = 200.00, steps = 200\n",
      "00:04:21 [DEBUG] train episode 3153: reward = 158.00, steps = 158\n",
      "00:04:21 [DEBUG] train episode 3154: reward = 200.00, steps = 200\n",
      "00:04:21 [DEBUG] train episode 3155: reward = 200.00, steps = 200\n",
      "00:04:21 [DEBUG] train episode 3156: reward = 200.00, steps = 200\n",
      "00:04:21 [DEBUG] train episode 3157: reward = 200.00, steps = 200\n",
      "00:04:21 [DEBUG] train episode 3158: reward = 137.00, steps = 137\n",
      "00:04:21 [DEBUG] train episode 3159: reward = 175.00, steps = 175\n",
      "00:04:21 [DEBUG] train episode 3160: reward = 128.00, steps = 128\n",
      "00:04:21 [DEBUG] train episode 3161: reward = 142.00, steps = 142\n",
      "00:04:21 [DEBUG] train episode 3162: reward = 142.00, steps = 142\n",
      "00:04:21 [DEBUG] train episode 3163: reward = 160.00, steps = 160\n",
      "00:04:21 [DEBUG] train episode 3164: reward = 153.00, steps = 153\n",
      "00:04:21 [DEBUG] train episode 3165: reward = 154.00, steps = 154\n",
      "00:04:21 [DEBUG] train episode 3166: reward = 145.00, steps = 145\n",
      "00:04:21 [DEBUG] train episode 3167: reward = 135.00, steps = 135\n",
      "00:04:22 [DEBUG] train episode 3168: reward = 200.00, steps = 200\n",
      "00:04:22 [DEBUG] train episode 3169: reward = 162.00, steps = 162\n",
      "00:04:22 [DEBUG] train episode 3170: reward = 200.00, steps = 200\n",
      "00:04:22 [DEBUG] train episode 3171: reward = 149.00, steps = 149\n",
      "00:04:22 [DEBUG] train episode 3172: reward = 170.00, steps = 170\n",
      "00:04:22 [DEBUG] train episode 3173: reward = 182.00, steps = 182\n",
      "00:04:22 [DEBUG] train episode 3174: reward = 181.00, steps = 181\n",
      "00:04:22 [DEBUG] train episode 3175: reward = 120.00, steps = 120\n",
      "00:04:22 [DEBUG] train episode 3176: reward = 200.00, steps = 200\n",
      "00:04:22 [DEBUG] train episode 3177: reward = 176.00, steps = 176\n",
      "00:04:22 [DEBUG] train episode 3178: reward = 137.00, steps = 137\n",
      "00:04:22 [DEBUG] train episode 3179: reward = 111.00, steps = 111\n",
      "00:04:22 [DEBUG] train episode 3180: reward = 179.00, steps = 179\n",
      "00:04:22 [DEBUG] train episode 3181: reward = 128.00, steps = 128\n",
      "00:04:22 [DEBUG] train episode 3182: reward = 151.00, steps = 151\n",
      "00:04:22 [DEBUG] train episode 3183: reward = 143.00, steps = 143\n",
      "00:04:22 [DEBUG] train episode 3184: reward = 129.00, steps = 129\n",
      "00:04:22 [DEBUG] train episode 3185: reward = 127.00, steps = 127\n",
      "00:04:22 [DEBUG] train episode 3186: reward = 174.00, steps = 174\n",
      "00:04:22 [DEBUG] train episode 3187: reward = 111.00, steps = 111\n",
      "00:04:23 [DEBUG] train episode 3188: reward = 143.00, steps = 143\n",
      "00:04:23 [DEBUG] train episode 3189: reward = 195.00, steps = 195\n",
      "00:04:23 [DEBUG] train episode 3190: reward = 137.00, steps = 137\n",
      "00:04:23 [DEBUG] train episode 3191: reward = 199.00, steps = 199\n",
      "00:04:23 [DEBUG] train episode 3192: reward = 200.00, steps = 200\n",
      "00:04:23 [DEBUG] train episode 3193: reward = 200.00, steps = 200\n",
      "00:04:23 [DEBUG] train episode 3194: reward = 163.00, steps = 163\n",
      "00:04:23 [DEBUG] train episode 3195: reward = 173.00, steps = 173\n",
      "00:04:23 [DEBUG] train episode 3196: reward = 176.00, steps = 176\n",
      "00:04:23 [DEBUG] train episode 3197: reward = 138.00, steps = 138\n",
      "00:04:23 [DEBUG] train episode 3198: reward = 180.00, steps = 180\n",
      "00:04:23 [DEBUG] train episode 3199: reward = 151.00, steps = 151\n",
      "00:04:23 [DEBUG] train episode 3200: reward = 131.00, steps = 131\n",
      "00:04:23 [DEBUG] train episode 3201: reward = 137.00, steps = 137\n",
      "00:04:23 [DEBUG] train episode 3202: reward = 152.00, steps = 152\n",
      "00:04:23 [DEBUG] train episode 3203: reward = 139.00, steps = 139\n",
      "00:04:23 [DEBUG] train episode 3204: reward = 133.00, steps = 133\n",
      "00:04:23 [DEBUG] train episode 3205: reward = 200.00, steps = 200\n",
      "00:04:23 [DEBUG] train episode 3206: reward = 149.00, steps = 149\n",
      "00:04:23 [DEBUG] train episode 3207: reward = 152.00, steps = 152\n",
      "00:04:24 [DEBUG] train episode 3208: reward = 174.00, steps = 174\n",
      "00:04:24 [DEBUG] train episode 3209: reward = 200.00, steps = 200\n",
      "00:04:24 [DEBUG] train episode 3210: reward = 193.00, steps = 193\n",
      "00:04:24 [DEBUG] train episode 3211: reward = 200.00, steps = 200\n",
      "00:04:24 [DEBUG] train episode 3212: reward = 200.00, steps = 200\n",
      "00:04:24 [DEBUG] train episode 3213: reward = 131.00, steps = 131\n",
      "00:04:24 [DEBUG] train episode 3214: reward = 200.00, steps = 200\n",
      "00:04:24 [DEBUG] train episode 3215: reward = 200.00, steps = 200\n",
      "00:04:24 [DEBUG] train episode 3216: reward = 200.00, steps = 200\n",
      "00:04:24 [DEBUG] train episode 3217: reward = 142.00, steps = 142\n",
      "00:04:24 [DEBUG] train episode 3218: reward = 165.00, steps = 165\n",
      "00:04:24 [DEBUG] train episode 3219: reward = 135.00, steps = 135\n",
      "00:04:24 [DEBUG] train episode 3220: reward = 152.00, steps = 152\n",
      "00:04:24 [DEBUG] train episode 3221: reward = 152.00, steps = 152\n",
      "00:04:24 [DEBUG] train episode 3222: reward = 151.00, steps = 151\n",
      "00:04:24 [DEBUG] train episode 3223: reward = 200.00, steps = 200\n",
      "00:04:24 [DEBUG] train episode 3224: reward = 174.00, steps = 174\n",
      "00:04:24 [DEBUG] train episode 3225: reward = 181.00, steps = 181\n",
      "00:04:24 [DEBUG] train episode 3226: reward = 200.00, steps = 200\n",
      "00:04:25 [DEBUG] train episode 3227: reward = 166.00, steps = 166\n",
      "00:04:25 [DEBUG] train episode 3228: reward = 200.00, steps = 200\n",
      "00:04:25 [DEBUG] train episode 3229: reward = 194.00, steps = 194\n",
      "00:04:25 [DEBUG] train episode 3230: reward = 198.00, steps = 198\n",
      "00:04:25 [DEBUG] train episode 3231: reward = 168.00, steps = 168\n",
      "00:04:25 [DEBUG] train episode 3232: reward = 144.00, steps = 144\n",
      "00:04:25 [DEBUG] train episode 3233: reward = 200.00, steps = 200\n",
      "00:04:25 [DEBUG] train episode 3234: reward = 175.00, steps = 175\n",
      "00:04:25 [DEBUG] train episode 3235: reward = 147.00, steps = 147\n",
      "00:04:25 [DEBUG] train episode 3236: reward = 137.00, steps = 137\n",
      "00:04:25 [DEBUG] train episode 3237: reward = 200.00, steps = 200\n",
      "00:04:25 [DEBUG] train episode 3238: reward = 200.00, steps = 200\n",
      "00:04:25 [DEBUG] train episode 3239: reward = 163.00, steps = 163\n",
      "00:04:25 [DEBUG] train episode 3240: reward = 178.00, steps = 178\n",
      "00:04:25 [DEBUG] train episode 3241: reward = 200.00, steps = 200\n",
      "00:04:25 [DEBUG] train episode 3242: reward = 171.00, steps = 171\n",
      "00:04:25 [DEBUG] train episode 3243: reward = 200.00, steps = 200\n",
      "00:04:25 [DEBUG] train episode 3244: reward = 158.00, steps = 158\n",
      "00:04:26 [DEBUG] train episode 3245: reward = 160.00, steps = 160\n",
      "00:04:26 [DEBUG] train episode 3246: reward = 200.00, steps = 200\n",
      "00:04:26 [DEBUG] train episode 3247: reward = 162.00, steps = 162\n",
      "00:04:26 [DEBUG] train episode 3248: reward = 149.00, steps = 149\n",
      "00:04:26 [DEBUG] train episode 3249: reward = 199.00, steps = 199\n",
      "00:04:26 [DEBUG] train episode 3250: reward = 200.00, steps = 200\n",
      "00:04:26 [DEBUG] train episode 3251: reward = 200.00, steps = 200\n",
      "00:04:26 [DEBUG] train episode 3252: reward = 145.00, steps = 145\n",
      "00:04:26 [DEBUG] train episode 3253: reward = 167.00, steps = 167\n",
      "00:04:26 [DEBUG] train episode 3254: reward = 200.00, steps = 200\n",
      "00:04:26 [DEBUG] train episode 3255: reward = 200.00, steps = 200\n",
      "00:04:26 [DEBUG] train episode 3256: reward = 166.00, steps = 166\n",
      "00:04:26 [DEBUG] train episode 3257: reward = 197.00, steps = 197\n",
      "00:04:26 [DEBUG] train episode 3258: reward = 155.00, steps = 155\n",
      "00:04:26 [DEBUG] train episode 3259: reward = 200.00, steps = 200\n",
      "00:04:26 [DEBUG] train episode 3260: reward = 200.00, steps = 200\n",
      "00:04:26 [DEBUG] train episode 3261: reward = 185.00, steps = 185\n",
      "00:04:26 [DEBUG] train episode 3262: reward = 200.00, steps = 200\n",
      "00:04:26 [DEBUG] train episode 3263: reward = 200.00, steps = 200\n",
      "00:04:27 [DEBUG] train episode 3264: reward = 177.00, steps = 177\n",
      "00:04:27 [DEBUG] train episode 3265: reward = 168.00, steps = 168\n",
      "00:04:27 [DEBUG] train episode 3266: reward = 159.00, steps = 159\n",
      "00:04:27 [DEBUG] train episode 3267: reward = 200.00, steps = 200\n",
      "00:04:27 [DEBUG] train episode 3268: reward = 200.00, steps = 200\n",
      "00:04:27 [DEBUG] train episode 3269: reward = 162.00, steps = 162\n",
      "00:04:27 [DEBUG] train episode 3270: reward = 198.00, steps = 198\n",
      "00:04:27 [DEBUG] train episode 3271: reward = 129.00, steps = 129\n",
      "00:04:27 [DEBUG] train episode 3272: reward = 200.00, steps = 200\n",
      "00:04:27 [DEBUG] train episode 3273: reward = 200.00, steps = 200\n",
      "00:04:27 [DEBUG] train episode 3274: reward = 198.00, steps = 198\n",
      "00:04:27 [DEBUG] train episode 3275: reward = 156.00, steps = 156\n",
      "00:04:27 [DEBUG] train episode 3276: reward = 200.00, steps = 200\n",
      "00:04:27 [DEBUG] train episode 3277: reward = 169.00, steps = 169\n",
      "00:04:27 [DEBUG] train episode 3278: reward = 200.00, steps = 200\n",
      "00:04:27 [DEBUG] train episode 3279: reward = 200.00, steps = 200\n",
      "00:04:27 [DEBUG] train episode 3280: reward = 200.00, steps = 200\n",
      "00:04:27 [DEBUG] train episode 3281: reward = 200.00, steps = 200\n",
      "00:04:28 [DEBUG] train episode 3282: reward = 200.00, steps = 200\n",
      "00:04:28 [DEBUG] train episode 3283: reward = 193.00, steps = 193\n",
      "00:04:28 [DEBUG] train episode 3284: reward = 172.00, steps = 172\n",
      "00:04:28 [DEBUG] train episode 3285: reward = 200.00, steps = 200\n",
      "00:04:28 [DEBUG] train episode 3286: reward = 200.00, steps = 200\n",
      "00:04:28 [DEBUG] train episode 3287: reward = 200.00, steps = 200\n",
      "00:04:28 [DEBUG] train episode 3288: reward = 136.00, steps = 136\n",
      "00:04:28 [DEBUG] train episode 3289: reward = 192.00, steps = 192\n",
      "00:04:28 [DEBUG] train episode 3290: reward = 200.00, steps = 200\n",
      "00:04:28 [DEBUG] train episode 3291: reward = 200.00, steps = 200\n",
      "00:04:28 [DEBUG] train episode 3292: reward = 200.00, steps = 200\n",
      "00:04:28 [DEBUG] train episode 3293: reward = 199.00, steps = 199\n",
      "00:04:28 [DEBUG] train episode 3294: reward = 186.00, steps = 186\n",
      "00:04:28 [DEBUG] train episode 3295: reward = 200.00, steps = 200\n",
      "00:04:28 [DEBUG] train episode 3296: reward = 200.00, steps = 200\n",
      "00:04:28 [DEBUG] train episode 3297: reward = 200.00, steps = 200\n",
      "00:04:28 [DEBUG] train episode 3298: reward = 144.00, steps = 144\n",
      "00:04:28 [DEBUG] train episode 3299: reward = 186.00, steps = 186\n",
      "00:04:29 [DEBUG] train episode 3300: reward = 200.00, steps = 200\n",
      "00:04:29 [DEBUG] train episode 3301: reward = 200.00, steps = 200\n",
      "00:04:29 [DEBUG] train episode 3302: reward = 200.00, steps = 200\n",
      "00:04:29 [DEBUG] train episode 3303: reward = 198.00, steps = 198\n",
      "00:04:29 [DEBUG] train episode 3304: reward = 200.00, steps = 200\n",
      "00:04:29 [DEBUG] train episode 3305: reward = 181.00, steps = 181\n",
      "00:04:29 [DEBUG] train episode 3306: reward = 176.00, steps = 176\n",
      "00:04:29 [DEBUG] train episode 3307: reward = 169.00, steps = 169\n",
      "00:04:29 [DEBUG] train episode 3308: reward = 200.00, steps = 200\n",
      "00:04:29 [DEBUG] train episode 3309: reward = 158.00, steps = 158\n",
      "00:04:29 [DEBUG] train episode 3310: reward = 200.00, steps = 200\n",
      "00:04:29 [DEBUG] train episode 3311: reward = 200.00, steps = 200\n",
      "00:04:29 [DEBUG] train episode 3312: reward = 200.00, steps = 200\n",
      "00:04:29 [DEBUG] train episode 3313: reward = 161.00, steps = 161\n",
      "00:04:29 [DEBUG] train episode 3314: reward = 200.00, steps = 200\n",
      "00:04:29 [DEBUG] train episode 3315: reward = 200.00, steps = 200\n",
      "00:04:29 [DEBUG] train episode 3316: reward = 139.00, steps = 139\n",
      "00:04:29 [DEBUG] train episode 3317: reward = 200.00, steps = 200\n",
      "00:04:30 [DEBUG] train episode 3318: reward = 200.00, steps = 200\n",
      "00:04:30 [DEBUG] train episode 3319: reward = 163.00, steps = 163\n",
      "00:04:30 [DEBUG] train episode 3320: reward = 182.00, steps = 182\n",
      "00:04:30 [DEBUG] train episode 3321: reward = 161.00, steps = 161\n",
      "00:04:30 [DEBUG] train episode 3322: reward = 158.00, steps = 158\n",
      "00:04:30 [DEBUG] train episode 3323: reward = 200.00, steps = 200\n",
      "00:04:30 [DEBUG] train episode 3324: reward = 196.00, steps = 196\n",
      "00:04:30 [DEBUG] train episode 3325: reward = 165.00, steps = 165\n",
      "00:04:30 [DEBUG] train episode 3326: reward = 150.00, steps = 150\n",
      "00:04:30 [DEBUG] train episode 3327: reward = 159.00, steps = 159\n",
      "00:04:30 [DEBUG] train episode 3328: reward = 171.00, steps = 171\n",
      "00:04:30 [DEBUG] train episode 3329: reward = 180.00, steps = 180\n",
      "00:04:30 [DEBUG] train episode 3330: reward = 200.00, steps = 200\n",
      "00:04:30 [DEBUG] train episode 3331: reward = 154.00, steps = 154\n",
      "00:04:30 [DEBUG] train episode 3332: reward = 171.00, steps = 171\n",
      "00:04:30 [DEBUG] train episode 3333: reward = 169.00, steps = 169\n",
      "00:04:30 [DEBUG] train episode 3334: reward = 128.00, steps = 128\n",
      "00:04:30 [DEBUG] train episode 3335: reward = 200.00, steps = 200\n",
      "00:04:30 [DEBUG] train episode 3336: reward = 200.00, steps = 200\n",
      "00:04:31 [DEBUG] train episode 3337: reward = 200.00, steps = 200\n",
      "00:04:31 [DEBUG] train episode 3338: reward = 145.00, steps = 145\n",
      "00:04:31 [DEBUG] train episode 3339: reward = 155.00, steps = 155\n",
      "00:04:31 [DEBUG] train episode 3340: reward = 137.00, steps = 137\n",
      "00:04:31 [DEBUG] train episode 3341: reward = 180.00, steps = 180\n",
      "00:04:31 [DEBUG] train episode 3342: reward = 170.00, steps = 170\n",
      "00:04:31 [DEBUG] train episode 3343: reward = 177.00, steps = 177\n",
      "00:04:31 [DEBUG] train episode 3344: reward = 189.00, steps = 189\n",
      "00:04:31 [DEBUG] train episode 3345: reward = 141.00, steps = 141\n",
      "00:04:31 [DEBUG] train episode 3346: reward = 142.00, steps = 142\n",
      "00:04:31 [DEBUG] train episode 3347: reward = 112.00, steps = 112\n",
      "00:04:31 [DEBUG] train episode 3348: reward = 200.00, steps = 200\n",
      "00:04:31 [DEBUG] train episode 3349: reward = 149.00, steps = 149\n",
      "00:04:31 [DEBUG] train episode 3350: reward = 167.00, steps = 167\n",
      "00:04:31 [DEBUG] train episode 3351: reward = 193.00, steps = 193\n",
      "00:04:31 [DEBUG] train episode 3352: reward = 162.00, steps = 162\n",
      "00:04:31 [DEBUG] train episode 3353: reward = 184.00, steps = 184\n",
      "00:04:31 [DEBUG] train episode 3354: reward = 200.00, steps = 200\n",
      "00:04:31 [DEBUG] train episode 3355: reward = 193.00, steps = 193\n",
      "00:04:31 [DEBUG] train episode 3356: reward = 159.00, steps = 159\n",
      "00:04:32 [DEBUG] train episode 3357: reward = 200.00, steps = 200\n",
      "00:04:32 [DEBUG] train episode 3358: reward = 126.00, steps = 126\n",
      "00:04:32 [DEBUG] train episode 3359: reward = 200.00, steps = 200\n",
      "00:04:32 [DEBUG] train episode 3360: reward = 192.00, steps = 192\n",
      "00:04:32 [DEBUG] train episode 3361: reward = 197.00, steps = 197\n",
      "00:04:32 [DEBUG] train episode 3362: reward = 200.00, steps = 200\n",
      "00:04:32 [DEBUG] train episode 3363: reward = 148.00, steps = 148\n",
      "00:04:32 [DEBUG] train episode 3364: reward = 154.00, steps = 154\n",
      "00:04:32 [DEBUG] train episode 3365: reward = 136.00, steps = 136\n",
      "00:04:32 [DEBUG] train episode 3366: reward = 200.00, steps = 200\n",
      "00:04:32 [DEBUG] train episode 3367: reward = 158.00, steps = 158\n",
      "00:04:32 [DEBUG] train episode 3368: reward = 174.00, steps = 174\n",
      "00:04:32 [DEBUG] train episode 3369: reward = 132.00, steps = 132\n",
      "00:04:32 [DEBUG] train episode 3370: reward = 128.00, steps = 128\n",
      "00:04:32 [DEBUG] train episode 3371: reward = 192.00, steps = 192\n",
      "00:04:32 [DEBUG] train episode 3372: reward = 177.00, steps = 177\n",
      "00:04:32 [DEBUG] train episode 3373: reward = 186.00, steps = 186\n",
      "00:04:32 [DEBUG] train episode 3374: reward = 184.00, steps = 184\n",
      "00:04:32 [DEBUG] train episode 3375: reward = 141.00, steps = 141\n",
      "00:04:32 [DEBUG] train episode 3376: reward = 149.00, steps = 149\n",
      "00:04:33 [DEBUG] train episode 3377: reward = 186.00, steps = 186\n",
      "00:04:33 [DEBUG] train episode 3378: reward = 200.00, steps = 200\n",
      "00:04:33 [DEBUG] train episode 3379: reward = 187.00, steps = 187\n",
      "00:04:33 [DEBUG] train episode 3380: reward = 200.00, steps = 200\n",
      "00:04:33 [DEBUG] train episode 3381: reward = 137.00, steps = 137\n",
      "00:04:33 [DEBUG] train episode 3382: reward = 144.00, steps = 144\n",
      "00:04:33 [DEBUG] train episode 3383: reward = 200.00, steps = 200\n",
      "00:04:33 [DEBUG] train episode 3384: reward = 192.00, steps = 192\n",
      "00:04:33 [DEBUG] train episode 3385: reward = 126.00, steps = 126\n",
      "00:04:33 [DEBUG] train episode 3386: reward = 200.00, steps = 200\n",
      "00:04:33 [DEBUG] train episode 3387: reward = 200.00, steps = 200\n",
      "00:04:33 [DEBUG] train episode 3388: reward = 140.00, steps = 140\n",
      "00:04:33 [DEBUG] train episode 3389: reward = 157.00, steps = 157\n",
      "00:04:33 [DEBUG] train episode 3390: reward = 160.00, steps = 160\n",
      "00:04:33 [DEBUG] train episode 3391: reward = 145.00, steps = 145\n",
      "00:04:33 [DEBUG] train episode 3392: reward = 159.00, steps = 159\n",
      "00:04:33 [DEBUG] train episode 3393: reward = 155.00, steps = 155\n",
      "00:04:33 [DEBUG] train episode 3394: reward = 170.00, steps = 170\n",
      "00:04:33 [DEBUG] train episode 3395: reward = 200.00, steps = 200\n",
      "00:04:34 [DEBUG] train episode 3396: reward = 115.00, steps = 115\n",
      "00:04:34 [DEBUG] train episode 3397: reward = 149.00, steps = 149\n",
      "00:04:34 [DEBUG] train episode 3398: reward = 185.00, steps = 185\n",
      "00:04:34 [DEBUG] train episode 3399: reward = 161.00, steps = 161\n",
      "00:04:34 [DEBUG] train episode 3400: reward = 156.00, steps = 156\n",
      "00:04:34 [DEBUG] train episode 3401: reward = 163.00, steps = 163\n",
      "00:04:34 [DEBUG] train episode 3402: reward = 153.00, steps = 153\n",
      "00:04:34 [DEBUG] train episode 3403: reward = 107.00, steps = 107\n",
      "00:04:34 [DEBUG] train episode 3404: reward = 194.00, steps = 194\n",
      "00:04:34 [DEBUG] train episode 3405: reward = 178.00, steps = 178\n",
      "00:04:34 [DEBUG] train episode 3406: reward = 114.00, steps = 114\n",
      "00:04:34 [DEBUG] train episode 3407: reward = 182.00, steps = 182\n",
      "00:04:34 [DEBUG] train episode 3408: reward = 145.00, steps = 145\n",
      "00:04:34 [DEBUG] train episode 3409: reward = 130.00, steps = 130\n",
      "00:04:34 [DEBUG] train episode 3410: reward = 145.00, steps = 145\n",
      "00:04:34 [DEBUG] train episode 3411: reward = 200.00, steps = 200\n",
      "00:04:34 [DEBUG] train episode 3412: reward = 143.00, steps = 143\n",
      "00:04:34 [DEBUG] train episode 3413: reward = 148.00, steps = 148\n",
      "00:04:34 [DEBUG] train episode 3414: reward = 149.00, steps = 149\n",
      "00:04:34 [DEBUG] train episode 3415: reward = 200.00, steps = 200\n",
      "00:04:34 [DEBUG] train episode 3416: reward = 125.00, steps = 125\n",
      "00:04:35 [DEBUG] train episode 3417: reward = 139.00, steps = 139\n",
      "00:04:35 [DEBUG] train episode 3418: reward = 125.00, steps = 125\n",
      "00:04:35 [DEBUG] train episode 3419: reward = 198.00, steps = 198\n",
      "00:04:35 [DEBUG] train episode 3420: reward = 169.00, steps = 169\n",
      "00:04:35 [DEBUG] train episode 3421: reward = 130.00, steps = 130\n",
      "00:04:35 [DEBUG] train episode 3422: reward = 166.00, steps = 166\n",
      "00:04:35 [DEBUG] train episode 3423: reward = 135.00, steps = 135\n",
      "00:04:35 [DEBUG] train episode 3424: reward = 127.00, steps = 127\n",
      "00:04:35 [DEBUG] train episode 3425: reward = 196.00, steps = 196\n",
      "00:04:35 [DEBUG] train episode 3426: reward = 138.00, steps = 138\n",
      "00:04:35 [DEBUG] train episode 3427: reward = 180.00, steps = 180\n",
      "00:04:35 [DEBUG] train episode 3428: reward = 160.00, steps = 160\n",
      "00:04:35 [DEBUG] train episode 3429: reward = 122.00, steps = 122\n",
      "00:04:35 [DEBUG] train episode 3430: reward = 186.00, steps = 186\n",
      "00:04:35 [DEBUG] train episode 3431: reward = 126.00, steps = 126\n",
      "00:04:35 [DEBUG] train episode 3432: reward = 200.00, steps = 200\n",
      "00:04:35 [DEBUG] train episode 3433: reward = 200.00, steps = 200\n",
      "00:04:35 [DEBUG] train episode 3434: reward = 198.00, steps = 198\n",
      "00:04:35 [DEBUG] train episode 3435: reward = 200.00, steps = 200\n",
      "00:04:35 [DEBUG] train episode 3436: reward = 200.00, steps = 200\n",
      "00:04:36 [DEBUG] train episode 3437: reward = 200.00, steps = 200\n",
      "00:04:36 [DEBUG] train episode 3438: reward = 161.00, steps = 161\n",
      "00:04:36 [DEBUG] train episode 3439: reward = 154.00, steps = 154\n",
      "00:04:36 [DEBUG] train episode 3440: reward = 200.00, steps = 200\n",
      "00:04:36 [DEBUG] train episode 3441: reward = 200.00, steps = 200\n",
      "00:04:36 [DEBUG] train episode 3442: reward = 188.00, steps = 188\n",
      "00:04:36 [DEBUG] train episode 3443: reward = 167.00, steps = 167\n",
      "00:04:36 [DEBUG] train episode 3444: reward = 196.00, steps = 196\n",
      "00:04:36 [DEBUG] train episode 3445: reward = 185.00, steps = 185\n",
      "00:04:36 [DEBUG] train episode 3446: reward = 200.00, steps = 200\n",
      "00:04:36 [DEBUG] train episode 3447: reward = 166.00, steps = 166\n",
      "00:04:36 [DEBUG] train episode 3448: reward = 200.00, steps = 200\n",
      "00:04:36 [DEBUG] train episode 3449: reward = 169.00, steps = 169\n",
      "00:04:36 [DEBUG] train episode 3450: reward = 200.00, steps = 200\n",
      "00:04:36 [DEBUG] train episode 3451: reward = 164.00, steps = 164\n",
      "00:04:36 [DEBUG] train episode 3452: reward = 162.00, steps = 162\n",
      "00:04:36 [DEBUG] train episode 3453: reward = 143.00, steps = 143\n",
      "00:04:36 [DEBUG] train episode 3454: reward = 164.00, steps = 164\n",
      "00:04:37 [DEBUG] train episode 3455: reward = 139.00, steps = 139\n",
      "00:04:37 [DEBUG] train episode 3456: reward = 123.00, steps = 123\n",
      "00:04:37 [DEBUG] train episode 3457: reward = 132.00, steps = 132\n",
      "00:04:37 [DEBUG] train episode 3458: reward = 144.00, steps = 144\n",
      "00:04:37 [DEBUG] train episode 3459: reward = 139.00, steps = 139\n",
      "00:04:37 [DEBUG] train episode 3460: reward = 200.00, steps = 200\n",
      "00:04:37 [DEBUG] train episode 3461: reward = 149.00, steps = 149\n",
      "00:04:37 [DEBUG] train episode 3462: reward = 175.00, steps = 175\n",
      "00:04:37 [DEBUG] train episode 3463: reward = 200.00, steps = 200\n",
      "00:04:37 [DEBUG] train episode 3464: reward = 200.00, steps = 200\n",
      "00:04:37 [DEBUG] train episode 3465: reward = 159.00, steps = 159\n",
      "00:04:37 [DEBUG] train episode 3466: reward = 134.00, steps = 134\n",
      "00:04:37 [DEBUG] train episode 3467: reward = 200.00, steps = 200\n",
      "00:04:37 [DEBUG] train episode 3468: reward = 152.00, steps = 152\n",
      "00:04:37 [DEBUG] train episode 3469: reward = 149.00, steps = 149\n",
      "00:04:37 [DEBUG] train episode 3470: reward = 198.00, steps = 198\n",
      "00:04:37 [DEBUG] train episode 3471: reward = 200.00, steps = 200\n",
      "00:04:37 [DEBUG] train episode 3472: reward = 137.00, steps = 137\n",
      "00:04:37 [DEBUG] train episode 3473: reward = 200.00, steps = 200\n",
      "00:04:37 [DEBUG] train episode 3474: reward = 125.00, steps = 125\n",
      "00:04:38 [DEBUG] train episode 3475: reward = 153.00, steps = 153\n",
      "00:04:38 [DEBUG] train episode 3476: reward = 166.00, steps = 166\n",
      "00:04:38 [DEBUG] train episode 3477: reward = 161.00, steps = 161\n",
      "00:04:38 [DEBUG] train episode 3478: reward = 200.00, steps = 200\n",
      "00:04:38 [DEBUG] train episode 3479: reward = 200.00, steps = 200\n",
      "00:04:38 [DEBUG] train episode 3480: reward = 200.00, steps = 200\n",
      "00:04:38 [DEBUG] train episode 3481: reward = 178.00, steps = 178\n",
      "00:04:38 [DEBUG] train episode 3482: reward = 200.00, steps = 200\n",
      "00:04:38 [DEBUG] train episode 3483: reward = 200.00, steps = 200\n",
      "00:04:38 [DEBUG] train episode 3484: reward = 174.00, steps = 174\n",
      "00:04:38 [DEBUG] train episode 3485: reward = 179.00, steps = 179\n",
      "00:04:38 [DEBUG] train episode 3486: reward = 200.00, steps = 200\n",
      "00:04:38 [DEBUG] train episode 3487: reward = 200.00, steps = 200\n",
      "00:04:38 [DEBUG] train episode 3488: reward = 200.00, steps = 200\n",
      "00:04:38 [DEBUG] train episode 3489: reward = 178.00, steps = 178\n",
      "00:04:38 [DEBUG] train episode 3490: reward = 158.00, steps = 158\n",
      "00:04:38 [DEBUG] train episode 3491: reward = 139.00, steps = 139\n",
      "00:04:38 [DEBUG] train episode 3492: reward = 200.00, steps = 200\n",
      "00:04:38 [DEBUG] train episode 3493: reward = 200.00, steps = 200\n",
      "00:04:39 [DEBUG] train episode 3494: reward = 145.00, steps = 145\n",
      "00:04:39 [DEBUG] train episode 3495: reward = 200.00, steps = 200\n",
      "00:04:39 [DEBUG] train episode 3496: reward = 200.00, steps = 200\n",
      "00:04:39 [DEBUG] train episode 3497: reward = 200.00, steps = 200\n",
      "00:04:39 [DEBUG] train episode 3498: reward = 200.00, steps = 200\n",
      "00:04:39 [DEBUG] train episode 3499: reward = 200.00, steps = 200\n",
      "00:04:39 [DEBUG] train episode 3500: reward = 154.00, steps = 154\n",
      "00:04:39 [DEBUG] train episode 3501: reward = 195.00, steps = 195\n",
      "00:04:39 [DEBUG] train episode 3502: reward = 200.00, steps = 200\n",
      "00:04:39 [DEBUG] train episode 3503: reward = 200.00, steps = 200\n",
      "00:04:39 [DEBUG] train episode 3504: reward = 150.00, steps = 150\n",
      "00:04:39 [DEBUG] train episode 3505: reward = 154.00, steps = 154\n",
      "00:04:39 [DEBUG] train episode 3506: reward = 200.00, steps = 200\n",
      "00:04:39 [DEBUG] train episode 3507: reward = 131.00, steps = 131\n",
      "00:04:39 [DEBUG] train episode 3508: reward = 200.00, steps = 200\n",
      "00:04:39 [DEBUG] train episode 3509: reward = 179.00, steps = 179\n",
      "00:04:39 [DEBUG] train episode 3510: reward = 149.00, steps = 149\n",
      "00:04:39 [DEBUG] train episode 3511: reward = 200.00, steps = 200\n",
      "00:04:40 [DEBUG] train episode 3512: reward = 200.00, steps = 200\n",
      "00:04:40 [DEBUG] train episode 3513: reward = 200.00, steps = 200\n",
      "00:04:40 [DEBUG] train episode 3514: reward = 200.00, steps = 200\n",
      "00:04:40 [DEBUG] train episode 3515: reward = 171.00, steps = 171\n",
      "00:04:40 [DEBUG] train episode 3516: reward = 165.00, steps = 165\n",
      "00:04:40 [DEBUG] train episode 3517: reward = 143.00, steps = 143\n",
      "00:04:40 [DEBUG] train episode 3518: reward = 151.00, steps = 151\n",
      "00:04:40 [DEBUG] train episode 3519: reward = 134.00, steps = 134\n",
      "00:04:40 [DEBUG] train episode 3520: reward = 200.00, steps = 200\n",
      "00:04:40 [DEBUG] train episode 3521: reward = 153.00, steps = 153\n",
      "00:04:40 [DEBUG] train episode 3522: reward = 185.00, steps = 185\n",
      "00:04:40 [DEBUG] train episode 3523: reward = 175.00, steps = 175\n",
      "00:04:40 [DEBUG] train episode 3524: reward = 195.00, steps = 195\n",
      "00:04:40 [DEBUG] train episode 3525: reward = 200.00, steps = 200\n",
      "00:04:40 [DEBUG] train episode 3526: reward = 138.00, steps = 138\n",
      "00:04:40 [DEBUG] train episode 3527: reward = 131.00, steps = 131\n",
      "00:04:40 [DEBUG] train episode 3528: reward = 200.00, steps = 200\n",
      "00:04:40 [DEBUG] train episode 3529: reward = 153.00, steps = 153\n",
      "00:04:40 [DEBUG] train episode 3530: reward = 147.00, steps = 147\n",
      "00:04:41 [DEBUG] train episode 3531: reward = 161.00, steps = 161\n",
      "00:04:41 [DEBUG] train episode 3532: reward = 200.00, steps = 200\n",
      "00:04:41 [DEBUG] train episode 3533: reward = 134.00, steps = 134\n",
      "00:04:41 [DEBUG] train episode 3534: reward = 200.00, steps = 200\n",
      "00:04:41 [DEBUG] train episode 3535: reward = 196.00, steps = 196\n",
      "00:04:41 [DEBUG] train episode 3536: reward = 200.00, steps = 200\n",
      "00:04:41 [DEBUG] train episode 3537: reward = 161.00, steps = 161\n",
      "00:04:41 [DEBUG] train episode 3538: reward = 200.00, steps = 200\n",
      "00:04:41 [DEBUG] train episode 3539: reward = 190.00, steps = 190\n",
      "00:04:41 [DEBUG] train episode 3540: reward = 200.00, steps = 200\n",
      "00:04:41 [DEBUG] train episode 3541: reward = 167.00, steps = 167\n",
      "00:04:41 [DEBUG] train episode 3542: reward = 176.00, steps = 176\n",
      "00:04:41 [DEBUG] train episode 3543: reward = 173.00, steps = 173\n",
      "00:04:41 [DEBUG] train episode 3544: reward = 200.00, steps = 200\n",
      "00:04:41 [DEBUG] train episode 3545: reward = 135.00, steps = 135\n",
      "00:04:41 [DEBUG] train episode 3546: reward = 200.00, steps = 200\n",
      "00:04:41 [DEBUG] train episode 3547: reward = 121.00, steps = 121\n",
      "00:04:41 [DEBUG] train episode 3548: reward = 200.00, steps = 200\n",
      "00:04:42 [DEBUG] train episode 3549: reward = 137.00, steps = 137\n",
      "00:04:42 [DEBUG] train episode 3550: reward = 139.00, steps = 139\n",
      "00:04:42 [DEBUG] train episode 3551: reward = 200.00, steps = 200\n",
      "00:04:42 [DEBUG] train episode 3552: reward = 200.00, steps = 200\n",
      "00:04:42 [DEBUG] train episode 3553: reward = 200.00, steps = 200\n",
      "00:04:42 [DEBUG] train episode 3554: reward = 177.00, steps = 177\n",
      "00:04:42 [DEBUG] train episode 3555: reward = 160.00, steps = 160\n",
      "00:04:42 [DEBUG] train episode 3556: reward = 184.00, steps = 184\n",
      "00:04:42 [DEBUG] train episode 3557: reward = 148.00, steps = 148\n",
      "00:04:42 [DEBUG] train episode 3558: reward = 138.00, steps = 138\n",
      "00:04:42 [DEBUG] train episode 3559: reward = 120.00, steps = 120\n",
      "00:04:42 [DEBUG] train episode 3560: reward = 137.00, steps = 137\n",
      "00:04:42 [DEBUG] train episode 3561: reward = 200.00, steps = 200\n",
      "00:04:42 [DEBUG] train episode 3562: reward = 158.00, steps = 158\n",
      "00:04:42 [DEBUG] train episode 3563: reward = 143.00, steps = 143\n",
      "00:04:42 [DEBUG] train episode 3564: reward = 182.00, steps = 182\n",
      "00:04:42 [DEBUG] train episode 3565: reward = 138.00, steps = 138\n",
      "00:04:42 [DEBUG] train episode 3566: reward = 200.00, steps = 200\n",
      "00:04:42 [DEBUG] train episode 3567: reward = 128.00, steps = 128\n",
      "00:04:42 [DEBUG] train episode 3568: reward = 163.00, steps = 163\n",
      "00:04:42 [DEBUG] train episode 3569: reward = 171.00, steps = 171\n",
      "00:04:43 [DEBUG] train episode 3570: reward = 131.00, steps = 131\n",
      "00:04:43 [DEBUG] train episode 3571: reward = 140.00, steps = 140\n",
      "00:04:43 [DEBUG] train episode 3572: reward = 136.00, steps = 136\n",
      "00:04:43 [DEBUG] train episode 3573: reward = 140.00, steps = 140\n",
      "00:04:43 [DEBUG] train episode 3574: reward = 117.00, steps = 117\n",
      "00:04:43 [DEBUG] train episode 3575: reward = 120.00, steps = 120\n",
      "00:04:43 [DEBUG] train episode 3576: reward = 200.00, steps = 200\n",
      "00:04:43 [DEBUG] train episode 3577: reward = 112.00, steps = 112\n",
      "00:04:43 [DEBUG] train episode 3578: reward = 151.00, steps = 151\n",
      "00:04:43 [DEBUG] train episode 3579: reward = 117.00, steps = 117\n",
      "00:04:43 [DEBUG] train episode 3580: reward = 174.00, steps = 174\n",
      "00:04:43 [DEBUG] train episode 3581: reward = 111.00, steps = 111\n",
      "00:04:43 [DEBUG] train episode 3582: reward = 137.00, steps = 137\n",
      "00:04:43 [DEBUG] train episode 3583: reward = 125.00, steps = 125\n",
      "00:04:43 [DEBUG] train episode 3584: reward = 140.00, steps = 140\n",
      "00:04:43 [DEBUG] train episode 3585: reward = 192.00, steps = 192\n",
      "00:04:43 [DEBUG] train episode 3586: reward = 177.00, steps = 177\n",
      "00:04:43 [DEBUG] train episode 3587: reward = 194.00, steps = 194\n",
      "00:04:43 [DEBUG] train episode 3588: reward = 126.00, steps = 126\n",
      "00:04:43 [DEBUG] train episode 3589: reward = 185.00, steps = 185\n",
      "00:04:43 [DEBUG] train episode 3590: reward = 148.00, steps = 148\n",
      "00:04:43 [DEBUG] train episode 3591: reward = 200.00, steps = 200\n",
      "00:04:44 [DEBUG] train episode 3592: reward = 110.00, steps = 110\n",
      "00:04:44 [DEBUG] train episode 3593: reward = 174.00, steps = 174\n",
      "00:04:44 [DEBUG] train episode 3594: reward = 166.00, steps = 166\n",
      "00:04:44 [DEBUG] train episode 3595: reward = 130.00, steps = 130\n",
      "00:04:44 [DEBUG] train episode 3596: reward = 101.00, steps = 101\n",
      "00:04:44 [DEBUG] train episode 3597: reward = 200.00, steps = 200\n",
      "00:04:44 [DEBUG] train episode 3598: reward = 136.00, steps = 136\n",
      "00:04:44 [DEBUG] train episode 3599: reward = 108.00, steps = 108\n",
      "00:04:44 [DEBUG] train episode 3600: reward = 168.00, steps = 168\n",
      "00:04:44 [DEBUG] train episode 3601: reward = 124.00, steps = 124\n",
      "00:04:44 [DEBUG] train episode 3602: reward = 118.00, steps = 118\n",
      "00:04:44 [DEBUG] train episode 3603: reward = 200.00, steps = 200\n",
      "00:04:44 [DEBUG] train episode 3604: reward = 184.00, steps = 184\n",
      "00:04:44 [DEBUG] train episode 3605: reward = 135.00, steps = 135\n",
      "00:04:44 [DEBUG] train episode 3606: reward = 168.00, steps = 168\n",
      "00:04:44 [DEBUG] train episode 3607: reward = 112.00, steps = 112\n",
      "00:04:44 [DEBUG] train episode 3608: reward = 145.00, steps = 145\n",
      "00:04:44 [DEBUG] train episode 3609: reward = 141.00, steps = 141\n",
      "00:04:44 [DEBUG] train episode 3610: reward = 159.00, steps = 159\n",
      "00:04:44 [DEBUG] train episode 3611: reward = 98.00, steps = 98\n",
      "00:04:44 [DEBUG] train episode 3612: reward = 118.00, steps = 118\n",
      "00:04:44 [DEBUG] train episode 3613: reward = 107.00, steps = 107\n",
      "00:04:44 [DEBUG] train episode 3614: reward = 100.00, steps = 100\n",
      "00:04:44 [DEBUG] train episode 3615: reward = 113.00, steps = 113\n",
      "00:04:45 [DEBUG] train episode 3616: reward = 162.00, steps = 162\n",
      "00:04:45 [DEBUG] train episode 3617: reward = 124.00, steps = 124\n",
      "00:04:45 [DEBUG] train episode 3618: reward = 143.00, steps = 143\n",
      "00:04:45 [DEBUG] train episode 3619: reward = 128.00, steps = 128\n",
      "00:04:45 [DEBUG] train episode 3620: reward = 104.00, steps = 104\n",
      "00:04:45 [DEBUG] train episode 3621: reward = 140.00, steps = 140\n",
      "00:04:45 [DEBUG] train episode 3622: reward = 200.00, steps = 200\n",
      "00:04:45 [DEBUG] train episode 3623: reward = 197.00, steps = 197\n",
      "00:04:45 [DEBUG] train episode 3624: reward = 117.00, steps = 117\n",
      "00:04:45 [DEBUG] train episode 3625: reward = 135.00, steps = 135\n",
      "00:04:45 [DEBUG] train episode 3626: reward = 115.00, steps = 115\n",
      "00:04:45 [DEBUG] train episode 3627: reward = 116.00, steps = 116\n",
      "00:04:45 [DEBUG] train episode 3628: reward = 200.00, steps = 200\n",
      "00:04:45 [DEBUG] train episode 3629: reward = 138.00, steps = 138\n",
      "00:04:45 [DEBUG] train episode 3630: reward = 180.00, steps = 180\n",
      "00:04:45 [DEBUG] train episode 3631: reward = 155.00, steps = 155\n",
      "00:04:45 [DEBUG] train episode 3632: reward = 120.00, steps = 120\n",
      "00:04:45 [DEBUG] train episode 3633: reward = 111.00, steps = 111\n",
      "00:04:45 [DEBUG] train episode 3634: reward = 184.00, steps = 184\n",
      "00:04:45 [DEBUG] train episode 3635: reward = 115.00, steps = 115\n",
      "00:04:45 [DEBUG] train episode 3636: reward = 131.00, steps = 131\n",
      "00:04:45 [DEBUG] train episode 3637: reward = 142.00, steps = 142\n",
      "00:04:46 [DEBUG] train episode 3638: reward = 187.00, steps = 187\n",
      "00:04:46 [DEBUG] train episode 3639: reward = 120.00, steps = 120\n",
      "00:04:46 [DEBUG] train episode 3640: reward = 92.00, steps = 92\n",
      "00:04:46 [DEBUG] train episode 3641: reward = 200.00, steps = 200\n",
      "00:04:46 [DEBUG] train episode 3642: reward = 119.00, steps = 119\n",
      "00:04:46 [DEBUG] train episode 3643: reward = 123.00, steps = 123\n",
      "00:04:46 [DEBUG] train episode 3644: reward = 155.00, steps = 155\n",
      "00:04:46 [DEBUG] train episode 3645: reward = 141.00, steps = 141\n",
      "00:04:46 [DEBUG] train episode 3646: reward = 200.00, steps = 200\n",
      "00:04:46 [DEBUG] train episode 3647: reward = 128.00, steps = 128\n",
      "00:04:46 [DEBUG] train episode 3648: reward = 200.00, steps = 200\n",
      "00:04:46 [DEBUG] train episode 3649: reward = 144.00, steps = 144\n",
      "00:04:46 [DEBUG] train episode 3650: reward = 147.00, steps = 147\n",
      "00:04:46 [DEBUG] train episode 3651: reward = 147.00, steps = 147\n",
      "00:04:46 [DEBUG] train episode 3652: reward = 125.00, steps = 125\n",
      "00:04:46 [DEBUG] train episode 3653: reward = 130.00, steps = 130\n",
      "00:04:46 [DEBUG] train episode 3654: reward = 136.00, steps = 136\n",
      "00:04:46 [DEBUG] train episode 3655: reward = 135.00, steps = 135\n",
      "00:04:46 [DEBUG] train episode 3656: reward = 200.00, steps = 200\n",
      "00:04:46 [DEBUG] train episode 3657: reward = 113.00, steps = 113\n",
      "00:04:46 [DEBUG] train episode 3658: reward = 133.00, steps = 133\n",
      "00:04:46 [DEBUG] train episode 3659: reward = 163.00, steps = 163\n",
      "00:04:46 [DEBUG] train episode 3660: reward = 177.00, steps = 177\n",
      "00:04:47 [DEBUG] train episode 3661: reward = 200.00, steps = 200\n",
      "00:04:47 [DEBUG] train episode 3662: reward = 153.00, steps = 153\n",
      "00:04:47 [DEBUG] train episode 3663: reward = 135.00, steps = 135\n",
      "00:04:47 [DEBUG] train episode 3664: reward = 117.00, steps = 117\n",
      "00:04:47 [DEBUG] train episode 3665: reward = 91.00, steps = 91\n",
      "00:04:47 [DEBUG] train episode 3666: reward = 164.00, steps = 164\n",
      "00:04:47 [DEBUG] train episode 3667: reward = 145.00, steps = 145\n",
      "00:04:47 [DEBUG] train episode 3668: reward = 126.00, steps = 126\n",
      "00:04:47 [DEBUG] train episode 3669: reward = 110.00, steps = 110\n",
      "00:04:47 [DEBUG] train episode 3670: reward = 151.00, steps = 151\n",
      "00:04:47 [DEBUG] train episode 3671: reward = 157.00, steps = 157\n",
      "00:04:47 [DEBUG] train episode 3672: reward = 123.00, steps = 123\n",
      "00:04:47 [DEBUG] train episode 3673: reward = 120.00, steps = 120\n",
      "00:04:47 [DEBUG] train episode 3674: reward = 115.00, steps = 115\n",
      "00:04:47 [DEBUG] train episode 3675: reward = 200.00, steps = 200\n",
      "00:04:47 [DEBUG] train episode 3676: reward = 138.00, steps = 138\n",
      "00:04:47 [DEBUG] train episode 3677: reward = 109.00, steps = 109\n",
      "00:04:47 [DEBUG] train episode 3678: reward = 200.00, steps = 200\n",
      "00:04:47 [DEBUG] train episode 3679: reward = 110.00, steps = 110\n",
      "00:04:47 [DEBUG] train episode 3680: reward = 149.00, steps = 149\n",
      "00:04:47 [DEBUG] train episode 3681: reward = 130.00, steps = 130\n",
      "00:04:47 [DEBUG] train episode 3682: reward = 100.00, steps = 100\n",
      "00:04:47 [DEBUG] train episode 3683: reward = 92.00, steps = 92\n",
      "00:04:48 [DEBUG] train episode 3684: reward = 132.00, steps = 132\n",
      "00:04:48 [DEBUG] train episode 3685: reward = 150.00, steps = 150\n",
      "00:04:48 [DEBUG] train episode 3686: reward = 115.00, steps = 115\n",
      "00:04:48 [DEBUG] train episode 3687: reward = 128.00, steps = 128\n",
      "00:04:48 [DEBUG] train episode 3688: reward = 197.00, steps = 197\n",
      "00:04:48 [DEBUG] train episode 3689: reward = 98.00, steps = 98\n",
      "00:04:48 [DEBUG] train episode 3690: reward = 200.00, steps = 200\n",
      "00:04:48 [DEBUG] train episode 3691: reward = 105.00, steps = 105\n",
      "00:04:48 [DEBUG] train episode 3692: reward = 200.00, steps = 200\n",
      "00:04:48 [DEBUG] train episode 3693: reward = 113.00, steps = 113\n",
      "00:04:48 [DEBUG] train episode 3694: reward = 166.00, steps = 166\n",
      "00:04:48 [DEBUG] train episode 3695: reward = 130.00, steps = 130\n",
      "00:04:48 [DEBUG] train episode 3696: reward = 101.00, steps = 101\n",
      "00:04:48 [DEBUG] train episode 3697: reward = 97.00, steps = 97\n",
      "00:04:48 [DEBUG] train episode 3698: reward = 132.00, steps = 132\n",
      "00:04:48 [DEBUG] train episode 3699: reward = 138.00, steps = 138\n",
      "00:04:48 [DEBUG] train episode 3700: reward = 120.00, steps = 120\n",
      "00:04:48 [DEBUG] train episode 3701: reward = 171.00, steps = 171\n",
      "00:04:48 [DEBUG] train episode 3702: reward = 188.00, steps = 188\n",
      "00:04:48 [DEBUG] train episode 3703: reward = 117.00, steps = 117\n",
      "00:04:48 [DEBUG] train episode 3704: reward = 109.00, steps = 109\n",
      "00:04:48 [DEBUG] train episode 3705: reward = 200.00, steps = 200\n",
      "00:04:48 [DEBUG] train episode 3706: reward = 154.00, steps = 154\n",
      "00:04:49 [DEBUG] train episode 3707: reward = 129.00, steps = 129\n",
      "00:04:49 [DEBUG] train episode 3708: reward = 147.00, steps = 147\n",
      "00:04:49 [DEBUG] train episode 3709: reward = 124.00, steps = 124\n",
      "00:04:49 [DEBUG] train episode 3710: reward = 141.00, steps = 141\n",
      "00:04:49 [DEBUG] train episode 3711: reward = 127.00, steps = 127\n",
      "00:04:49 [DEBUG] train episode 3712: reward = 200.00, steps = 200\n",
      "00:04:49 [DEBUG] train episode 3713: reward = 123.00, steps = 123\n",
      "00:04:49 [DEBUG] train episode 3714: reward = 103.00, steps = 103\n",
      "00:04:49 [DEBUG] train episode 3715: reward = 120.00, steps = 120\n",
      "00:04:49 [DEBUG] train episode 3716: reward = 115.00, steps = 115\n",
      "00:04:49 [DEBUG] train episode 3717: reward = 98.00, steps = 98\n",
      "00:04:49 [DEBUG] train episode 3718: reward = 173.00, steps = 173\n",
      "00:04:49 [DEBUG] train episode 3719: reward = 137.00, steps = 137\n",
      "00:04:49 [DEBUG] train episode 3720: reward = 120.00, steps = 120\n",
      "00:04:49 [DEBUG] train episode 3721: reward = 103.00, steps = 103\n",
      "00:04:49 [DEBUG] train episode 3722: reward = 90.00, steps = 90\n",
      "00:04:49 [DEBUG] train episode 3723: reward = 103.00, steps = 103\n",
      "00:04:49 [DEBUG] train episode 3724: reward = 127.00, steps = 127\n",
      "00:04:49 [DEBUG] train episode 3725: reward = 162.00, steps = 162\n",
      "00:04:49 [DEBUG] train episode 3726: reward = 102.00, steps = 102\n",
      "00:04:49 [DEBUG] train episode 3727: reward = 96.00, steps = 96\n",
      "00:04:49 [DEBUG] train episode 3728: reward = 114.00, steps = 114\n",
      "00:04:49 [DEBUG] train episode 3729: reward = 156.00, steps = 156\n",
      "00:04:49 [DEBUG] train episode 3730: reward = 99.00, steps = 99\n",
      "00:04:49 [DEBUG] train episode 3731: reward = 123.00, steps = 123\n",
      "00:04:49 [DEBUG] train episode 3732: reward = 196.00, steps = 196\n",
      "00:04:50 [DEBUG] train episode 3733: reward = 142.00, steps = 142\n",
      "00:04:50 [DEBUG] train episode 3734: reward = 110.00, steps = 110\n",
      "00:04:50 [DEBUG] train episode 3735: reward = 93.00, steps = 93\n",
      "00:04:50 [DEBUG] train episode 3736: reward = 119.00, steps = 119\n",
      "00:04:50 [DEBUG] train episode 3737: reward = 113.00, steps = 113\n",
      "00:04:50 [DEBUG] train episode 3738: reward = 124.00, steps = 124\n",
      "00:04:50 [DEBUG] train episode 3739: reward = 84.00, steps = 84\n",
      "00:04:50 [DEBUG] train episode 3740: reward = 181.00, steps = 181\n",
      "00:04:50 [DEBUG] train episode 3741: reward = 115.00, steps = 115\n",
      "00:04:50 [DEBUG] train episode 3742: reward = 93.00, steps = 93\n",
      "00:04:50 [DEBUG] train episode 3743: reward = 110.00, steps = 110\n",
      "00:04:50 [DEBUG] train episode 3744: reward = 200.00, steps = 200\n",
      "00:04:50 [DEBUG] train episode 3745: reward = 108.00, steps = 108\n",
      "00:04:50 [DEBUG] train episode 3746: reward = 198.00, steps = 198\n",
      "00:04:50 [DEBUG] train episode 3747: reward = 93.00, steps = 93\n",
      "00:04:50 [DEBUG] train episode 3748: reward = 111.00, steps = 111\n",
      "00:04:50 [DEBUG] train episode 3749: reward = 127.00, steps = 127\n",
      "00:04:50 [DEBUG] train episode 3750: reward = 200.00, steps = 200\n",
      "00:04:50 [DEBUG] train episode 3751: reward = 124.00, steps = 124\n",
      "00:04:50 [DEBUG] train episode 3752: reward = 138.00, steps = 138\n",
      "00:04:50 [DEBUG] train episode 3753: reward = 200.00, steps = 200\n",
      "00:04:50 [DEBUG] train episode 3754: reward = 136.00, steps = 136\n",
      "00:04:50 [DEBUG] train episode 3755: reward = 104.00, steps = 104\n",
      "00:04:50 [DEBUG] train episode 3756: reward = 92.00, steps = 92\n",
      "00:04:50 [DEBUG] train episode 3757: reward = 200.00, steps = 200\n",
      "00:04:51 [DEBUG] train episode 3758: reward = 111.00, steps = 111\n",
      "00:04:51 [DEBUG] train episode 3759: reward = 200.00, steps = 200\n",
      "00:04:51 [DEBUG] train episode 3760: reward = 158.00, steps = 158\n",
      "00:04:51 [DEBUG] train episode 3761: reward = 137.00, steps = 137\n",
      "00:04:51 [DEBUG] train episode 3762: reward = 122.00, steps = 122\n",
      "00:04:51 [DEBUG] train episode 3763: reward = 102.00, steps = 102\n",
      "00:04:51 [DEBUG] train episode 3764: reward = 174.00, steps = 174\n",
      "00:04:51 [DEBUG] train episode 3765: reward = 122.00, steps = 122\n",
      "00:04:51 [DEBUG] train episode 3766: reward = 114.00, steps = 114\n",
      "00:04:51 [DEBUG] train episode 3767: reward = 156.00, steps = 156\n",
      "00:04:51 [DEBUG] train episode 3768: reward = 120.00, steps = 120\n",
      "00:04:51 [DEBUG] train episode 3769: reward = 154.00, steps = 154\n",
      "00:04:51 [DEBUG] train episode 3770: reward = 149.00, steps = 149\n",
      "00:04:51 [DEBUG] train episode 3771: reward = 138.00, steps = 138\n",
      "00:04:51 [DEBUG] train episode 3772: reward = 100.00, steps = 100\n",
      "00:04:51 [DEBUG] train episode 3773: reward = 139.00, steps = 139\n",
      "00:04:51 [DEBUG] train episode 3774: reward = 115.00, steps = 115\n",
      "00:04:51 [DEBUG] train episode 3775: reward = 110.00, steps = 110\n",
      "00:04:51 [DEBUG] train episode 3776: reward = 131.00, steps = 131\n",
      "00:04:51 [DEBUG] train episode 3777: reward = 140.00, steps = 140\n",
      "00:04:51 [DEBUG] train episode 3778: reward = 110.00, steps = 110\n",
      "00:04:51 [DEBUG] train episode 3779: reward = 142.00, steps = 142\n",
      "00:04:51 [DEBUG] train episode 3780: reward = 200.00, steps = 200\n",
      "00:04:51 [DEBUG] train episode 3781: reward = 128.00, steps = 128\n",
      "00:04:52 [DEBUG] train episode 3782: reward = 148.00, steps = 148\n",
      "00:04:52 [DEBUG] train episode 3783: reward = 133.00, steps = 133\n",
      "00:04:52 [DEBUG] train episode 3784: reward = 158.00, steps = 158\n",
      "00:04:52 [DEBUG] train episode 3785: reward = 190.00, steps = 190\n",
      "00:04:52 [DEBUG] train episode 3786: reward = 172.00, steps = 172\n",
      "00:04:52 [DEBUG] train episode 3787: reward = 165.00, steps = 165\n",
      "00:04:52 [DEBUG] train episode 3788: reward = 179.00, steps = 179\n",
      "00:04:52 [DEBUG] train episode 3789: reward = 139.00, steps = 139\n",
      "00:04:52 [DEBUG] train episode 3790: reward = 200.00, steps = 200\n",
      "00:04:52 [DEBUG] train episode 3791: reward = 123.00, steps = 123\n",
      "00:04:52 [DEBUG] train episode 3792: reward = 160.00, steps = 160\n",
      "00:04:52 [DEBUG] train episode 3793: reward = 181.00, steps = 181\n",
      "00:04:52 [DEBUG] train episode 3794: reward = 121.00, steps = 121\n",
      "00:04:52 [DEBUG] train episode 3795: reward = 115.00, steps = 115\n",
      "00:04:52 [DEBUG] train episode 3796: reward = 124.00, steps = 124\n",
      "00:04:52 [DEBUG] train episode 3797: reward = 154.00, steps = 154\n",
      "00:04:52 [DEBUG] train episode 3798: reward = 157.00, steps = 157\n",
      "00:04:52 [DEBUG] train episode 3799: reward = 158.00, steps = 158\n",
      "00:04:52 [DEBUG] train episode 3800: reward = 177.00, steps = 177\n",
      "00:04:52 [DEBUG] train episode 3801: reward = 139.00, steps = 139\n",
      "00:04:52 [DEBUG] train episode 3802: reward = 162.00, steps = 162\n",
      "00:04:52 [DEBUG] train episode 3803: reward = 130.00, steps = 130\n",
      "00:04:53 [DEBUG] train episode 3804: reward = 181.00, steps = 181\n",
      "00:04:53 [DEBUG] train episode 3805: reward = 109.00, steps = 109\n",
      "00:04:53 [DEBUG] train episode 3806: reward = 108.00, steps = 108\n",
      "00:04:53 [DEBUG] train episode 3807: reward = 140.00, steps = 140\n",
      "00:04:53 [DEBUG] train episode 3808: reward = 131.00, steps = 131\n",
      "00:04:53 [DEBUG] train episode 3809: reward = 177.00, steps = 177\n",
      "00:04:53 [DEBUG] train episode 3810: reward = 127.00, steps = 127\n",
      "00:04:53 [DEBUG] train episode 3811: reward = 125.00, steps = 125\n",
      "00:04:53 [DEBUG] train episode 3812: reward = 103.00, steps = 103\n",
      "00:04:53 [DEBUG] train episode 3813: reward = 197.00, steps = 197\n",
      "00:04:53 [DEBUG] train episode 3814: reward = 200.00, steps = 200\n",
      "00:04:53 [DEBUG] train episode 3815: reward = 123.00, steps = 123\n",
      "00:04:53 [DEBUG] train episode 3816: reward = 113.00, steps = 113\n",
      "00:04:53 [DEBUG] train episode 3817: reward = 166.00, steps = 166\n",
      "00:04:53 [DEBUG] train episode 3818: reward = 124.00, steps = 124\n",
      "00:04:53 [DEBUG] train episode 3819: reward = 188.00, steps = 188\n",
      "00:04:53 [DEBUG] train episode 3820: reward = 159.00, steps = 159\n",
      "00:04:53 [DEBUG] train episode 3821: reward = 156.00, steps = 156\n",
      "00:04:53 [DEBUG] train episode 3822: reward = 137.00, steps = 137\n",
      "00:04:53 [DEBUG] train episode 3823: reward = 153.00, steps = 153\n",
      "00:04:53 [DEBUG] train episode 3824: reward = 124.00, steps = 124\n",
      "00:04:53 [DEBUG] train episode 3825: reward = 200.00, steps = 200\n",
      "00:04:54 [DEBUG] train episode 3826: reward = 121.00, steps = 121\n",
      "00:04:54 [DEBUG] train episode 3827: reward = 127.00, steps = 127\n",
      "00:04:54 [DEBUG] train episode 3828: reward = 200.00, steps = 200\n",
      "00:04:54 [DEBUG] train episode 3829: reward = 166.00, steps = 166\n",
      "00:04:54 [DEBUG] train episode 3830: reward = 164.00, steps = 164\n",
      "00:04:54 [DEBUG] train episode 3831: reward = 162.00, steps = 162\n",
      "00:04:54 [DEBUG] train episode 3832: reward = 181.00, steps = 181\n",
      "00:04:54 [DEBUG] train episode 3833: reward = 179.00, steps = 179\n",
      "00:04:54 [DEBUG] train episode 3834: reward = 184.00, steps = 184\n",
      "00:04:54 [DEBUG] train episode 3835: reward = 164.00, steps = 164\n",
      "00:04:54 [DEBUG] train episode 3836: reward = 200.00, steps = 200\n",
      "00:04:54 [DEBUG] train episode 3837: reward = 167.00, steps = 167\n",
      "00:04:54 [DEBUG] train episode 3838: reward = 200.00, steps = 200\n",
      "00:04:54 [DEBUG] train episode 3839: reward = 200.00, steps = 200\n",
      "00:04:54 [DEBUG] train episode 3840: reward = 161.00, steps = 161\n",
      "00:04:54 [DEBUG] train episode 3841: reward = 200.00, steps = 200\n",
      "00:04:54 [DEBUG] train episode 3842: reward = 141.00, steps = 141\n",
      "00:04:54 [DEBUG] train episode 3843: reward = 156.00, steps = 156\n",
      "00:04:54 [DEBUG] train episode 3844: reward = 130.00, steps = 130\n",
      "00:04:54 [DEBUG] train episode 3845: reward = 167.00, steps = 167\n",
      "00:04:55 [DEBUG] train episode 3846: reward = 157.00, steps = 157\n",
      "00:04:55 [DEBUG] train episode 3847: reward = 165.00, steps = 165\n",
      "00:04:55 [DEBUG] train episode 3848: reward = 133.00, steps = 133\n",
      "00:04:55 [DEBUG] train episode 3849: reward = 146.00, steps = 146\n",
      "00:04:55 [DEBUG] train episode 3850: reward = 180.00, steps = 180\n",
      "00:04:55 [DEBUG] train episode 3851: reward = 160.00, steps = 160\n",
      "00:04:55 [DEBUG] train episode 3852: reward = 140.00, steps = 140\n",
      "00:04:55 [DEBUG] train episode 3853: reward = 163.00, steps = 163\n",
      "00:04:55 [DEBUG] train episode 3854: reward = 143.00, steps = 143\n",
      "00:04:55 [DEBUG] train episode 3855: reward = 146.00, steps = 146\n",
      "00:04:55 [DEBUG] train episode 3856: reward = 200.00, steps = 200\n",
      "00:04:55 [DEBUG] train episode 3857: reward = 200.00, steps = 200\n",
      "00:04:55 [DEBUG] train episode 3858: reward = 156.00, steps = 156\n",
      "00:04:55 [DEBUG] train episode 3859: reward = 200.00, steps = 200\n",
      "00:04:55 [DEBUG] train episode 3860: reward = 169.00, steps = 169\n",
      "00:04:55 [DEBUG] train episode 3861: reward = 180.00, steps = 180\n",
      "00:04:55 [DEBUG] train episode 3862: reward = 200.00, steps = 200\n",
      "00:04:55 [DEBUG] train episode 3863: reward = 200.00, steps = 200\n",
      "00:04:55 [DEBUG] train episode 3864: reward = 200.00, steps = 200\n",
      "00:04:56 [DEBUG] train episode 3865: reward = 110.00, steps = 110\n",
      "00:04:56 [DEBUG] train episode 3866: reward = 152.00, steps = 152\n",
      "00:04:56 [DEBUG] train episode 3867: reward = 182.00, steps = 182\n",
      "00:04:56 [DEBUG] train episode 3868: reward = 157.00, steps = 157\n",
      "00:04:56 [DEBUG] train episode 3869: reward = 167.00, steps = 167\n",
      "00:04:56 [DEBUG] train episode 3870: reward = 141.00, steps = 141\n",
      "00:04:56 [DEBUG] train episode 3871: reward = 159.00, steps = 159\n",
      "00:04:56 [DEBUG] train episode 3872: reward = 141.00, steps = 141\n",
      "00:04:56 [DEBUG] train episode 3873: reward = 200.00, steps = 200\n",
      "00:04:56 [DEBUG] train episode 3874: reward = 200.00, steps = 200\n",
      "00:04:56 [DEBUG] train episode 3875: reward = 115.00, steps = 115\n",
      "00:04:56 [DEBUG] train episode 3876: reward = 200.00, steps = 200\n",
      "00:04:56 [DEBUG] train episode 3877: reward = 159.00, steps = 159\n",
      "00:04:56 [DEBUG] train episode 3878: reward = 108.00, steps = 108\n",
      "00:04:56 [DEBUG] train episode 3879: reward = 119.00, steps = 119\n",
      "00:04:56 [DEBUG] train episode 3880: reward = 200.00, steps = 200\n",
      "00:04:56 [DEBUG] train episode 3881: reward = 142.00, steps = 142\n",
      "00:04:56 [DEBUG] train episode 3882: reward = 180.00, steps = 180\n",
      "00:04:56 [DEBUG] train episode 3883: reward = 126.00, steps = 126\n",
      "00:04:56 [DEBUG] train episode 3884: reward = 200.00, steps = 200\n",
      "00:04:56 [DEBUG] train episode 3885: reward = 141.00, steps = 141\n",
      "00:04:56 [DEBUG] train episode 3886: reward = 146.00, steps = 146\n",
      "00:04:57 [DEBUG] train episode 3887: reward = 106.00, steps = 106\n",
      "00:04:57 [DEBUG] train episode 3888: reward = 154.00, steps = 154\n",
      "00:04:57 [DEBUG] train episode 3889: reward = 147.00, steps = 147\n",
      "00:04:57 [DEBUG] train episode 3890: reward = 148.00, steps = 148\n",
      "00:04:57 [DEBUG] train episode 3891: reward = 152.00, steps = 152\n",
      "00:04:57 [DEBUG] train episode 3892: reward = 178.00, steps = 178\n",
      "00:04:57 [DEBUG] train episode 3893: reward = 133.00, steps = 133\n",
      "00:04:57 [DEBUG] train episode 3894: reward = 143.00, steps = 143\n",
      "00:04:57 [DEBUG] train episode 3895: reward = 154.00, steps = 154\n",
      "00:04:57 [DEBUG] train episode 3896: reward = 138.00, steps = 138\n",
      "00:04:57 [DEBUG] train episode 3897: reward = 173.00, steps = 173\n",
      "00:04:57 [DEBUG] train episode 3898: reward = 112.00, steps = 112\n",
      "00:04:57 [DEBUG] train episode 3899: reward = 128.00, steps = 128\n",
      "00:04:57 [DEBUG] train episode 3900: reward = 177.00, steps = 177\n",
      "00:04:57 [DEBUG] train episode 3901: reward = 146.00, steps = 146\n",
      "00:04:57 [DEBUG] train episode 3902: reward = 200.00, steps = 200\n",
      "00:04:57 [DEBUG] train episode 3903: reward = 200.00, steps = 200\n",
      "00:04:57 [DEBUG] train episode 3904: reward = 136.00, steps = 136\n",
      "00:04:57 [DEBUG] train episode 3905: reward = 153.00, steps = 153\n",
      "00:04:57 [DEBUG] train episode 3906: reward = 150.00, steps = 150\n",
      "00:04:57 [DEBUG] train episode 3907: reward = 134.00, steps = 134\n",
      "00:04:58 [DEBUG] train episode 3908: reward = 197.00, steps = 197\n",
      "00:04:58 [DEBUG] train episode 3909: reward = 172.00, steps = 172\n",
      "00:04:58 [DEBUG] train episode 3910: reward = 200.00, steps = 200\n",
      "00:04:58 [DEBUG] train episode 3911: reward = 167.00, steps = 167\n",
      "00:04:58 [DEBUG] train episode 3912: reward = 174.00, steps = 174\n",
      "00:04:58 [DEBUG] train episode 3913: reward = 157.00, steps = 157\n",
      "00:04:58 [DEBUG] train episode 3914: reward = 144.00, steps = 144\n",
      "00:04:58 [DEBUG] train episode 3915: reward = 200.00, steps = 200\n",
      "00:04:58 [DEBUG] train episode 3916: reward = 150.00, steps = 150\n",
      "00:04:58 [DEBUG] train episode 3917: reward = 116.00, steps = 116\n",
      "00:04:58 [DEBUG] train episode 3918: reward = 170.00, steps = 170\n",
      "00:04:58 [DEBUG] train episode 3919: reward = 143.00, steps = 143\n",
      "00:04:58 [DEBUG] train episode 3920: reward = 200.00, steps = 200\n",
      "00:04:58 [DEBUG] train episode 3921: reward = 196.00, steps = 196\n",
      "00:04:58 [DEBUG] train episode 3922: reward = 155.00, steps = 155\n",
      "00:04:58 [DEBUG] train episode 3923: reward = 156.00, steps = 156\n",
      "00:04:58 [DEBUG] train episode 3924: reward = 163.00, steps = 163\n",
      "00:04:58 [DEBUG] train episode 3925: reward = 183.00, steps = 183\n",
      "00:04:58 [DEBUG] train episode 3926: reward = 144.00, steps = 144\n",
      "00:04:58 [DEBUG] train episode 3927: reward = 128.00, steps = 128\n",
      "00:04:58 [DEBUG] train episode 3928: reward = 148.00, steps = 148\n",
      "00:04:59 [DEBUG] train episode 3929: reward = 148.00, steps = 148\n",
      "00:04:59 [DEBUG] train episode 3930: reward = 159.00, steps = 159\n",
      "00:04:59 [DEBUG] train episode 3931: reward = 171.00, steps = 171\n",
      "00:04:59 [DEBUG] train episode 3932: reward = 200.00, steps = 200\n",
      "00:04:59 [DEBUG] train episode 3933: reward = 130.00, steps = 130\n",
      "00:04:59 [DEBUG] train episode 3934: reward = 177.00, steps = 177\n",
      "00:04:59 [DEBUG] train episode 3935: reward = 128.00, steps = 128\n",
      "00:04:59 [DEBUG] train episode 3936: reward = 200.00, steps = 200\n",
      "00:04:59 [DEBUG] train episode 3937: reward = 142.00, steps = 142\n",
      "00:04:59 [DEBUG] train episode 3938: reward = 148.00, steps = 148\n",
      "00:04:59 [DEBUG] train episode 3939: reward = 174.00, steps = 174\n",
      "00:04:59 [DEBUG] train episode 3940: reward = 200.00, steps = 200\n",
      "00:04:59 [DEBUG] train episode 3941: reward = 141.00, steps = 141\n",
      "00:04:59 [DEBUG] train episode 3942: reward = 171.00, steps = 171\n",
      "00:04:59 [DEBUG] train episode 3943: reward = 199.00, steps = 199\n",
      "00:04:59 [DEBUG] train episode 3944: reward = 147.00, steps = 147\n",
      "00:04:59 [DEBUG] train episode 3945: reward = 147.00, steps = 147\n",
      "00:04:59 [DEBUG] train episode 3946: reward = 135.00, steps = 135\n",
      "00:04:59 [DEBUG] train episode 3947: reward = 147.00, steps = 147\n",
      "00:05:00 [DEBUG] train episode 3948: reward = 138.00, steps = 138\n",
      "00:05:00 [DEBUG] train episode 3949: reward = 132.00, steps = 132\n",
      "00:05:00 [DEBUG] train episode 3950: reward = 200.00, steps = 200\n",
      "00:05:00 [DEBUG] train episode 3951: reward = 117.00, steps = 117\n",
      "00:05:00 [DEBUG] train episode 3952: reward = 200.00, steps = 200\n",
      "00:05:00 [DEBUG] train episode 3953: reward = 150.00, steps = 150\n",
      "00:05:00 [DEBUG] train episode 3954: reward = 151.00, steps = 151\n",
      "00:05:00 [DEBUG] train episode 3955: reward = 145.00, steps = 145\n",
      "00:05:00 [DEBUG] train episode 3956: reward = 155.00, steps = 155\n",
      "00:05:00 [DEBUG] train episode 3957: reward = 139.00, steps = 139\n",
      "00:05:00 [DEBUG] train episode 3958: reward = 183.00, steps = 183\n",
      "00:05:00 [DEBUG] train episode 3959: reward = 169.00, steps = 169\n",
      "00:05:00 [DEBUG] train episode 3960: reward = 152.00, steps = 152\n",
      "00:05:00 [DEBUG] train episode 3961: reward = 146.00, steps = 146\n",
      "00:05:00 [DEBUG] train episode 3962: reward = 200.00, steps = 200\n",
      "00:05:00 [DEBUG] train episode 3963: reward = 146.00, steps = 146\n",
      "00:05:00 [DEBUG] train episode 3964: reward = 142.00, steps = 142\n",
      "00:05:00 [DEBUG] train episode 3965: reward = 200.00, steps = 200\n",
      "00:05:00 [DEBUG] train episode 3966: reward = 142.00, steps = 142\n",
      "00:05:00 [DEBUG] train episode 3967: reward = 179.00, steps = 179\n",
      "00:05:01 [DEBUG] train episode 3968: reward = 177.00, steps = 177\n",
      "00:05:01 [DEBUG] train episode 3969: reward = 139.00, steps = 139\n",
      "00:05:01 [DEBUG] train episode 3970: reward = 145.00, steps = 145\n",
      "00:05:01 [DEBUG] train episode 3971: reward = 200.00, steps = 200\n",
      "00:05:01 [DEBUG] train episode 3972: reward = 141.00, steps = 141\n",
      "00:05:01 [DEBUG] train episode 3973: reward = 114.00, steps = 114\n",
      "00:05:01 [DEBUG] train episode 3974: reward = 141.00, steps = 141\n",
      "00:05:01 [DEBUG] train episode 3975: reward = 140.00, steps = 140\n",
      "00:05:01 [DEBUG] train episode 3976: reward = 164.00, steps = 164\n",
      "00:05:01 [DEBUG] train episode 3977: reward = 137.00, steps = 137\n",
      "00:05:01 [DEBUG] train episode 3978: reward = 148.00, steps = 148\n",
      "00:05:01 [DEBUG] train episode 3979: reward = 184.00, steps = 184\n",
      "00:05:01 [DEBUG] train episode 3980: reward = 194.00, steps = 194\n",
      "00:05:01 [DEBUG] train episode 3981: reward = 171.00, steps = 171\n",
      "00:05:01 [DEBUG] train episode 3982: reward = 136.00, steps = 136\n",
      "00:05:01 [DEBUG] train episode 3983: reward = 131.00, steps = 131\n",
      "00:05:01 [DEBUG] train episode 3984: reward = 146.00, steps = 146\n",
      "00:05:01 [DEBUG] train episode 3985: reward = 155.00, steps = 155\n",
      "00:05:01 [DEBUG] train episode 3986: reward = 144.00, steps = 144\n",
      "00:05:01 [DEBUG] train episode 3987: reward = 130.00, steps = 130\n",
      "00:05:02 [DEBUG] train episode 3988: reward = 128.00, steps = 128\n",
      "00:05:02 [DEBUG] train episode 3989: reward = 143.00, steps = 143\n",
      "00:05:02 [DEBUG] train episode 3990: reward = 138.00, steps = 138\n",
      "00:05:02 [DEBUG] train episode 3991: reward = 200.00, steps = 200\n",
      "00:05:02 [DEBUG] train episode 3992: reward = 127.00, steps = 127\n",
      "00:05:02 [DEBUG] train episode 3993: reward = 133.00, steps = 133\n",
      "00:05:02 [DEBUG] train episode 3994: reward = 122.00, steps = 122\n",
      "00:05:02 [DEBUG] train episode 3995: reward = 147.00, steps = 147\n",
      "00:05:02 [DEBUG] train episode 3996: reward = 127.00, steps = 127\n",
      "00:05:02 [DEBUG] train episode 3997: reward = 135.00, steps = 135\n",
      "00:05:02 [DEBUG] train episode 3998: reward = 114.00, steps = 114\n",
      "00:05:02 [DEBUG] train episode 3999: reward = 156.00, steps = 156\n",
      "00:05:02 [DEBUG] train episode 4000: reward = 109.00, steps = 109\n",
      "00:05:02 [DEBUG] train episode 4001: reward = 119.00, steps = 119\n",
      "00:05:02 [DEBUG] train episode 4002: reward = 126.00, steps = 126\n",
      "00:05:02 [DEBUG] train episode 4003: reward = 142.00, steps = 142\n",
      "00:05:02 [DEBUG] train episode 4004: reward = 140.00, steps = 140\n",
      "00:05:02 [DEBUG] train episode 4005: reward = 151.00, steps = 151\n",
      "00:05:02 [DEBUG] train episode 4006: reward = 160.00, steps = 160\n",
      "00:05:02 [DEBUG] train episode 4007: reward = 136.00, steps = 136\n",
      "00:05:02 [DEBUG] train episode 4008: reward = 120.00, steps = 120\n",
      "00:05:02 [DEBUG] train episode 4009: reward = 137.00, steps = 137\n",
      "00:05:02 [DEBUG] train episode 4010: reward = 150.00, steps = 150\n",
      "00:05:03 [DEBUG] train episode 4011: reward = 132.00, steps = 132\n",
      "00:05:03 [DEBUG] train episode 4012: reward = 134.00, steps = 134\n",
      "00:05:03 [DEBUG] train episode 4013: reward = 198.00, steps = 198\n",
      "00:05:03 [DEBUG] train episode 4014: reward = 200.00, steps = 200\n",
      "00:05:03 [DEBUG] train episode 4015: reward = 154.00, steps = 154\n",
      "00:05:03 [DEBUG] train episode 4016: reward = 150.00, steps = 150\n",
      "00:05:03 [DEBUG] train episode 4017: reward = 146.00, steps = 146\n",
      "00:05:03 [DEBUG] train episode 4018: reward = 111.00, steps = 111\n",
      "00:05:03 [DEBUG] train episode 4019: reward = 125.00, steps = 125\n",
      "00:05:03 [DEBUG] train episode 4020: reward = 200.00, steps = 200\n",
      "00:05:03 [DEBUG] train episode 4021: reward = 190.00, steps = 190\n",
      "00:05:03 [DEBUG] train episode 4022: reward = 111.00, steps = 111\n",
      "00:05:03 [DEBUG] train episode 4023: reward = 200.00, steps = 200\n",
      "00:05:03 [DEBUG] train episode 4024: reward = 122.00, steps = 122\n",
      "00:05:03 [DEBUG] train episode 4025: reward = 133.00, steps = 133\n",
      "00:05:03 [DEBUG] train episode 4026: reward = 138.00, steps = 138\n",
      "00:05:03 [DEBUG] train episode 4027: reward = 154.00, steps = 154\n",
      "00:05:03 [DEBUG] train episode 4028: reward = 200.00, steps = 200\n",
      "00:05:03 [DEBUG] train episode 4029: reward = 94.00, steps = 94\n",
      "00:05:03 [DEBUG] train episode 4030: reward = 121.00, steps = 121\n",
      "00:05:04 [DEBUG] train episode 4031: reward = 200.00, steps = 200\n",
      "00:05:04 [DEBUG] train episode 4032: reward = 174.00, steps = 174\n",
      "00:05:04 [DEBUG] train episode 4033: reward = 111.00, steps = 111\n",
      "00:05:04 [DEBUG] train episode 4034: reward = 155.00, steps = 155\n",
      "00:05:04 [DEBUG] train episode 4035: reward = 98.00, steps = 98\n",
      "00:05:04 [DEBUG] train episode 4036: reward = 131.00, steps = 131\n",
      "00:05:04 [DEBUG] train episode 4037: reward = 179.00, steps = 179\n",
      "00:05:04 [DEBUG] train episode 4038: reward = 135.00, steps = 135\n",
      "00:05:04 [DEBUG] train episode 4039: reward = 132.00, steps = 132\n",
      "00:05:04 [DEBUG] train episode 4040: reward = 146.00, steps = 146\n",
      "00:05:04 [DEBUG] train episode 4041: reward = 180.00, steps = 180\n",
      "00:05:04 [DEBUG] train episode 4042: reward = 144.00, steps = 144\n",
      "00:05:04 [DEBUG] train episode 4043: reward = 138.00, steps = 138\n",
      "00:05:04 [DEBUG] train episode 4044: reward = 132.00, steps = 132\n",
      "00:05:04 [DEBUG] train episode 4045: reward = 154.00, steps = 154\n",
      "00:05:04 [DEBUG] train episode 4046: reward = 190.00, steps = 190\n",
      "00:05:04 [DEBUG] train episode 4047: reward = 153.00, steps = 153\n",
      "00:05:04 [DEBUG] train episode 4048: reward = 177.00, steps = 177\n",
      "00:05:04 [DEBUG] train episode 4049: reward = 136.00, steps = 136\n",
      "00:05:04 [DEBUG] train episode 4050: reward = 185.00, steps = 185\n",
      "00:05:04 [DEBUG] train episode 4051: reward = 155.00, steps = 155\n",
      "00:05:05 [DEBUG] train episode 4052: reward = 150.00, steps = 150\n",
      "00:05:05 [DEBUG] train episode 4053: reward = 185.00, steps = 185\n",
      "00:05:05 [DEBUG] train episode 4054: reward = 174.00, steps = 174\n",
      "00:05:05 [DEBUG] train episode 4055: reward = 200.00, steps = 200\n",
      "00:05:05 [DEBUG] train episode 4056: reward = 176.00, steps = 176\n",
      "00:05:05 [DEBUG] train episode 4057: reward = 125.00, steps = 125\n",
      "00:05:05 [DEBUG] train episode 4058: reward = 158.00, steps = 158\n",
      "00:05:05 [DEBUG] train episode 4059: reward = 200.00, steps = 200\n",
      "00:05:05 [DEBUG] train episode 4060: reward = 167.00, steps = 167\n",
      "00:05:05 [DEBUG] train episode 4061: reward = 178.00, steps = 178\n",
      "00:05:05 [DEBUG] train episode 4062: reward = 200.00, steps = 200\n",
      "00:05:05 [DEBUG] train episode 4063: reward = 175.00, steps = 175\n",
      "00:05:05 [DEBUG] train episode 4064: reward = 200.00, steps = 200\n",
      "00:05:05 [DEBUG] train episode 4065: reward = 200.00, steps = 200\n",
      "00:05:05 [DEBUG] train episode 4066: reward = 146.00, steps = 146\n",
      "00:05:05 [DEBUG] train episode 4067: reward = 200.00, steps = 200\n",
      "00:05:05 [DEBUG] train episode 4068: reward = 200.00, steps = 200\n",
      "00:05:06 [DEBUG] train episode 4069: reward = 200.00, steps = 200\n",
      "00:05:06 [DEBUG] train episode 4070: reward = 142.00, steps = 142\n",
      "00:05:06 [DEBUG] train episode 4071: reward = 200.00, steps = 200\n",
      "00:05:06 [DEBUG] train episode 4072: reward = 179.00, steps = 179\n",
      "00:05:06 [DEBUG] train episode 4073: reward = 200.00, steps = 200\n",
      "00:05:06 [DEBUG] train episode 4074: reward = 200.00, steps = 200\n",
      "00:05:06 [DEBUG] train episode 4075: reward = 136.00, steps = 136\n",
      "00:05:06 [DEBUG] train episode 4076: reward = 170.00, steps = 170\n",
      "00:05:06 [DEBUG] train episode 4077: reward = 161.00, steps = 161\n",
      "00:05:06 [DEBUG] train episode 4078: reward = 193.00, steps = 193\n",
      "00:05:06 [DEBUG] train episode 4079: reward = 200.00, steps = 200\n",
      "00:05:06 [DEBUG] train episode 4080: reward = 184.00, steps = 184\n",
      "00:05:06 [DEBUG] train episode 4081: reward = 200.00, steps = 200\n",
      "00:05:06 [DEBUG] train episode 4082: reward = 188.00, steps = 188\n",
      "00:05:06 [DEBUG] train episode 4083: reward = 169.00, steps = 169\n",
      "00:05:06 [DEBUG] train episode 4084: reward = 119.00, steps = 119\n",
      "00:05:06 [DEBUG] train episode 4085: reward = 200.00, steps = 200\n",
      "00:05:07 [DEBUG] train episode 4086: reward = 188.00, steps = 188\n",
      "00:05:07 [DEBUG] train episode 4087: reward = 197.00, steps = 197\n",
      "00:05:07 [DEBUG] train episode 4088: reward = 200.00, steps = 200\n",
      "00:05:07 [DEBUG] train episode 4089: reward = 200.00, steps = 200\n",
      "00:05:07 [DEBUG] train episode 4090: reward = 146.00, steps = 146\n",
      "00:05:07 [DEBUG] train episode 4091: reward = 200.00, steps = 200\n",
      "00:05:07 [DEBUG] train episode 4092: reward = 200.00, steps = 200\n",
      "00:05:07 [DEBUG] train episode 4093: reward = 200.00, steps = 200\n",
      "00:05:07 [DEBUG] train episode 4094: reward = 139.00, steps = 139\n",
      "00:05:07 [DEBUG] train episode 4095: reward = 200.00, steps = 200\n",
      "00:05:07 [DEBUG] train episode 4096: reward = 182.00, steps = 182\n",
      "00:05:07 [DEBUG] train episode 4097: reward = 180.00, steps = 180\n",
      "00:05:07 [DEBUG] train episode 4098: reward = 200.00, steps = 200\n",
      "00:05:07 [DEBUG] train episode 4099: reward = 193.00, steps = 193\n",
      "00:05:07 [DEBUG] train episode 4100: reward = 200.00, steps = 200\n",
      "00:05:07 [DEBUG] train episode 4101: reward = 200.00, steps = 200\n",
      "00:05:07 [DEBUG] train episode 4102: reward = 181.00, steps = 181\n",
      "00:05:08 [DEBUG] train episode 4103: reward = 174.00, steps = 174\n",
      "00:05:08 [DEBUG] train episode 4104: reward = 145.00, steps = 145\n",
      "00:05:08 [DEBUG] train episode 4105: reward = 146.00, steps = 146\n",
      "00:05:08 [DEBUG] train episode 4106: reward = 200.00, steps = 200\n",
      "00:05:08 [DEBUG] train episode 4107: reward = 174.00, steps = 174\n",
      "00:05:08 [DEBUG] train episode 4108: reward = 200.00, steps = 200\n",
      "00:05:08 [DEBUG] train episode 4109: reward = 200.00, steps = 200\n",
      "00:05:08 [DEBUG] train episode 4110: reward = 200.00, steps = 200\n",
      "00:05:08 [DEBUG] train episode 4111: reward = 190.00, steps = 190\n",
      "00:05:08 [DEBUG] train episode 4112: reward = 200.00, steps = 200\n",
      "00:05:08 [DEBUG] train episode 4113: reward = 143.00, steps = 143\n",
      "00:05:08 [DEBUG] train episode 4114: reward = 161.00, steps = 161\n",
      "00:05:08 [DEBUG] train episode 4115: reward = 200.00, steps = 200\n",
      "00:05:08 [DEBUG] train episode 4116: reward = 200.00, steps = 200\n",
      "00:05:08 [DEBUG] train episode 4117: reward = 146.00, steps = 146\n",
      "00:05:08 [DEBUG] train episode 4118: reward = 169.00, steps = 169\n",
      "00:05:08 [DEBUG] train episode 4119: reward = 178.00, steps = 178\n",
      "00:05:08 [DEBUG] train episode 4120: reward = 200.00, steps = 200\n",
      "00:05:08 [DEBUG] train episode 4121: reward = 200.00, steps = 200\n",
      "00:05:09 [DEBUG] train episode 4122: reward = 200.00, steps = 200\n",
      "00:05:09 [DEBUG] train episode 4123: reward = 160.00, steps = 160\n",
      "00:05:09 [DEBUG] train episode 4124: reward = 200.00, steps = 200\n",
      "00:05:09 [DEBUG] train episode 4125: reward = 200.00, steps = 200\n",
      "00:05:09 [DEBUG] train episode 4126: reward = 162.00, steps = 162\n",
      "00:05:09 [DEBUG] train episode 4127: reward = 200.00, steps = 200\n",
      "00:05:09 [DEBUG] train episode 4128: reward = 148.00, steps = 148\n",
      "00:05:09 [DEBUG] train episode 4129: reward = 200.00, steps = 200\n",
      "00:05:09 [DEBUG] train episode 4130: reward = 196.00, steps = 196\n",
      "00:05:09 [DEBUG] train episode 4131: reward = 200.00, steps = 200\n",
      "00:05:09 [DEBUG] train episode 4132: reward = 193.00, steps = 193\n",
      "00:05:09 [DEBUG] train episode 4133: reward = 200.00, steps = 200\n",
      "00:05:09 [DEBUG] train episode 4134: reward = 200.00, steps = 200\n",
      "00:05:09 [DEBUG] train episode 4135: reward = 200.00, steps = 200\n",
      "00:05:09 [DEBUG] train episode 4136: reward = 183.00, steps = 183\n",
      "00:05:09 [DEBUG] train episode 4137: reward = 88.00, steps = 88\n",
      "00:05:09 [DEBUG] train episode 4138: reward = 155.00, steps = 155\n",
      "00:05:09 [DEBUG] train episode 4139: reward = 191.00, steps = 191\n",
      "00:05:10 [DEBUG] train episode 4140: reward = 200.00, steps = 200\n",
      "00:05:10 [DEBUG] train episode 4141: reward = 124.00, steps = 124\n",
      "00:05:10 [DEBUG] train episode 4142: reward = 200.00, steps = 200\n",
      "00:05:10 [DEBUG] train episode 4143: reward = 200.00, steps = 200\n",
      "00:05:10 [DEBUG] train episode 4144: reward = 200.00, steps = 200\n",
      "00:05:10 [DEBUG] train episode 4145: reward = 200.00, steps = 200\n",
      "00:05:10 [DEBUG] train episode 4146: reward = 200.00, steps = 200\n",
      "00:05:10 [DEBUG] train episode 4147: reward = 171.00, steps = 171\n",
      "00:05:10 [DEBUG] train episode 4148: reward = 177.00, steps = 177\n",
      "00:05:10 [DEBUG] train episode 4149: reward = 200.00, steps = 200\n",
      "00:05:10 [DEBUG] train episode 4150: reward = 200.00, steps = 200\n",
      "00:05:10 [DEBUG] train episode 4151: reward = 166.00, steps = 166\n",
      "00:05:10 [DEBUG] train episode 4152: reward = 200.00, steps = 200\n",
      "00:05:10 [DEBUG] train episode 4153: reward = 200.00, steps = 200\n",
      "00:05:10 [DEBUG] train episode 4154: reward = 200.00, steps = 200\n",
      "00:05:10 [DEBUG] train episode 4155: reward = 142.00, steps = 142\n",
      "00:05:10 [DEBUG] train episode 4156: reward = 200.00, steps = 200\n",
      "00:05:10 [DEBUG] train episode 4157: reward = 168.00, steps = 168\n",
      "00:05:11 [DEBUG] train episode 4158: reward = 166.00, steps = 166\n",
      "00:05:11 [DEBUG] train episode 4159: reward = 169.00, steps = 169\n",
      "00:05:11 [DEBUG] train episode 4160: reward = 147.00, steps = 147\n",
      "00:05:11 [DEBUG] train episode 4161: reward = 132.00, steps = 132\n",
      "00:05:11 [DEBUG] train episode 4162: reward = 200.00, steps = 200\n",
      "00:05:11 [DEBUG] train episode 4163: reward = 138.00, steps = 138\n",
      "00:05:11 [DEBUG] train episode 4164: reward = 200.00, steps = 200\n",
      "00:05:11 [DEBUG] train episode 4165: reward = 200.00, steps = 200\n",
      "00:05:11 [DEBUG] train episode 4166: reward = 131.00, steps = 131\n",
      "00:05:11 [DEBUG] train episode 4167: reward = 173.00, steps = 173\n",
      "00:05:11 [DEBUG] train episode 4168: reward = 142.00, steps = 142\n",
      "00:05:11 [DEBUG] train episode 4169: reward = 200.00, steps = 200\n",
      "00:05:11 [DEBUG] train episode 4170: reward = 200.00, steps = 200\n",
      "00:05:11 [DEBUG] train episode 4171: reward = 178.00, steps = 178\n",
      "00:05:11 [DEBUG] train episode 4172: reward = 197.00, steps = 197\n",
      "00:05:11 [DEBUG] train episode 4173: reward = 200.00, steps = 200\n",
      "00:05:11 [DEBUG] train episode 4174: reward = 200.00, steps = 200\n",
      "00:05:11 [DEBUG] train episode 4175: reward = 158.00, steps = 158\n",
      "00:05:11 [DEBUG] train episode 4176: reward = 200.00, steps = 200\n",
      "00:05:12 [DEBUG] train episode 4177: reward = 147.00, steps = 147\n",
      "00:05:12 [DEBUG] train episode 4178: reward = 144.00, steps = 144\n",
      "00:05:12 [DEBUG] train episode 4179: reward = 132.00, steps = 132\n",
      "00:05:12 [DEBUG] train episode 4180: reward = 172.00, steps = 172\n",
      "00:05:12 [DEBUG] train episode 4181: reward = 149.00, steps = 149\n",
      "00:05:12 [DEBUG] train episode 4182: reward = 165.00, steps = 165\n",
      "00:05:12 [DEBUG] train episode 4183: reward = 200.00, steps = 200\n",
      "00:05:12 [DEBUG] train episode 4184: reward = 128.00, steps = 128\n",
      "00:05:12 [DEBUG] train episode 4185: reward = 142.00, steps = 142\n",
      "00:05:12 [DEBUG] train episode 4186: reward = 170.00, steps = 170\n",
      "00:05:12 [DEBUG] train episode 4187: reward = 200.00, steps = 200\n",
      "00:05:12 [DEBUG] train episode 4188: reward = 142.00, steps = 142\n",
      "00:05:12 [DEBUG] train episode 4189: reward = 153.00, steps = 153\n",
      "00:05:12 [DEBUG] train episode 4190: reward = 133.00, steps = 133\n",
      "00:05:12 [DEBUG] train episode 4191: reward = 149.00, steps = 149\n",
      "00:05:12 [DEBUG] train episode 4192: reward = 163.00, steps = 163\n",
      "00:05:12 [DEBUG] train episode 4193: reward = 200.00, steps = 200\n",
      "00:05:12 [DEBUG] train episode 4194: reward = 177.00, steps = 177\n",
      "00:05:12 [DEBUG] train episode 4195: reward = 184.00, steps = 184\n",
      "00:05:13 [DEBUG] train episode 4196: reward = 170.00, steps = 170\n",
      "00:05:13 [DEBUG] train episode 4197: reward = 200.00, steps = 200\n",
      "00:05:13 [DEBUG] train episode 4198: reward = 165.00, steps = 165\n",
      "00:05:13 [DEBUG] train episode 4199: reward = 193.00, steps = 193\n",
      "00:05:13 [DEBUG] train episode 4200: reward = 161.00, steps = 161\n",
      "00:05:13 [DEBUG] train episode 4201: reward = 200.00, steps = 200\n",
      "00:05:13 [DEBUG] train episode 4202: reward = 193.00, steps = 193\n",
      "00:05:13 [DEBUG] train episode 4203: reward = 200.00, steps = 200\n",
      "00:05:13 [DEBUG] train episode 4204: reward = 200.00, steps = 200\n",
      "00:05:13 [DEBUG] train episode 4205: reward = 191.00, steps = 191\n",
      "00:05:13 [DEBUG] train episode 4206: reward = 200.00, steps = 200\n",
      "00:05:13 [DEBUG] train episode 4207: reward = 200.00, steps = 200\n",
      "00:05:13 [DEBUG] train episode 4208: reward = 184.00, steps = 184\n",
      "00:05:13 [DEBUG] train episode 4209: reward = 159.00, steps = 159\n",
      "00:05:13 [DEBUG] train episode 4210: reward = 165.00, steps = 165\n",
      "00:05:13 [DEBUG] train episode 4211: reward = 197.00, steps = 197\n",
      "00:05:13 [DEBUG] train episode 4212: reward = 145.00, steps = 145\n",
      "00:05:13 [DEBUG] train episode 4213: reward = 169.00, steps = 169\n",
      "00:05:13 [DEBUG] train episode 4214: reward = 174.00, steps = 174\n",
      "00:05:14 [DEBUG] train episode 4215: reward = 200.00, steps = 200\n",
      "00:05:14 [DEBUG] train episode 4216: reward = 189.00, steps = 189\n",
      "00:05:14 [DEBUG] train episode 4217: reward = 179.00, steps = 179\n",
      "00:05:14 [DEBUG] train episode 4218: reward = 171.00, steps = 171\n",
      "00:05:14 [DEBUG] train episode 4219: reward = 146.00, steps = 146\n",
      "00:05:14 [DEBUG] train episode 4220: reward = 153.00, steps = 153\n",
      "00:05:14 [DEBUG] train episode 4221: reward = 162.00, steps = 162\n",
      "00:05:14 [DEBUG] train episode 4222: reward = 200.00, steps = 200\n",
      "00:05:14 [DEBUG] train episode 4223: reward = 159.00, steps = 159\n",
      "00:05:14 [DEBUG] train episode 4224: reward = 200.00, steps = 200\n",
      "00:05:14 [DEBUG] train episode 4225: reward = 200.00, steps = 200\n",
      "00:05:14 [DEBUG] train episode 4226: reward = 200.00, steps = 200\n",
      "00:05:14 [DEBUG] train episode 4227: reward = 191.00, steps = 191\n",
      "00:05:14 [DEBUG] train episode 4228: reward = 150.00, steps = 150\n",
      "00:05:14 [DEBUG] train episode 4229: reward = 125.00, steps = 125\n",
      "00:05:14 [DEBUG] train episode 4230: reward = 200.00, steps = 200\n",
      "00:05:14 [DEBUG] train episode 4231: reward = 182.00, steps = 182\n",
      "00:05:14 [DEBUG] train episode 4232: reward = 191.00, steps = 191\n",
      "00:05:15 [DEBUG] train episode 4233: reward = 175.00, steps = 175\n",
      "00:05:15 [DEBUG] train episode 4234: reward = 164.00, steps = 164\n",
      "00:05:15 [DEBUG] train episode 4235: reward = 176.00, steps = 176\n",
      "00:05:15 [DEBUG] train episode 4236: reward = 161.00, steps = 161\n",
      "00:05:15 [DEBUG] train episode 4237: reward = 200.00, steps = 200\n",
      "00:05:15 [DEBUG] train episode 4238: reward = 200.00, steps = 200\n",
      "00:05:15 [DEBUG] train episode 4239: reward = 196.00, steps = 196\n",
      "00:05:15 [DEBUG] train episode 4240: reward = 179.00, steps = 179\n",
      "00:05:15 [DEBUG] train episode 4241: reward = 200.00, steps = 200\n",
      "00:05:15 [DEBUG] train episode 4242: reward = 200.00, steps = 200\n",
      "00:05:15 [DEBUG] train episode 4243: reward = 170.00, steps = 170\n",
      "00:05:15 [DEBUG] train episode 4244: reward = 154.00, steps = 154\n",
      "00:05:15 [DEBUG] train episode 4245: reward = 200.00, steps = 200\n",
      "00:05:15 [DEBUG] train episode 4246: reward = 189.00, steps = 189\n",
      "00:05:15 [DEBUG] train episode 4247: reward = 159.00, steps = 159\n",
      "00:05:15 [DEBUG] train episode 4248: reward = 200.00, steps = 200\n",
      "00:05:15 [DEBUG] train episode 4249: reward = 200.00, steps = 200\n",
      "00:05:15 [DEBUG] train episode 4250: reward = 137.00, steps = 137\n",
      "00:05:15 [DEBUG] train episode 4251: reward = 140.00, steps = 140\n",
      "00:05:16 [DEBUG] train episode 4252: reward = 168.00, steps = 168\n",
      "00:05:16 [DEBUG] train episode 4253: reward = 192.00, steps = 192\n",
      "00:05:16 [DEBUG] train episode 4254: reward = 200.00, steps = 200\n",
      "00:05:16 [DEBUG] train episode 4255: reward = 200.00, steps = 200\n",
      "00:05:16 [DEBUG] train episode 4256: reward = 165.00, steps = 165\n",
      "00:05:16 [DEBUG] train episode 4257: reward = 176.00, steps = 176\n",
      "00:05:16 [DEBUG] train episode 4258: reward = 200.00, steps = 200\n",
      "00:05:16 [DEBUG] train episode 4259: reward = 148.00, steps = 148\n",
      "00:05:16 [DEBUG] train episode 4260: reward = 163.00, steps = 163\n",
      "00:05:16 [DEBUG] train episode 4261: reward = 148.00, steps = 148\n",
      "00:05:16 [DEBUG] train episode 4262: reward = 199.00, steps = 199\n",
      "00:05:16 [DEBUG] train episode 4263: reward = 135.00, steps = 135\n",
      "00:05:16 [DEBUG] train episode 4264: reward = 165.00, steps = 165\n",
      "00:05:16 [DEBUG] train episode 4265: reward = 127.00, steps = 127\n",
      "00:05:16 [DEBUG] train episode 4266: reward = 197.00, steps = 197\n",
      "00:05:16 [DEBUG] train episode 4267: reward = 127.00, steps = 127\n",
      "00:05:16 [DEBUG] train episode 4268: reward = 181.00, steps = 181\n",
      "00:05:16 [DEBUG] train episode 4269: reward = 178.00, steps = 178\n",
      "00:05:16 [DEBUG] train episode 4270: reward = 141.00, steps = 141\n",
      "00:05:17 [DEBUG] train episode 4271: reward = 200.00, steps = 200\n",
      "00:05:17 [DEBUG] train episode 4272: reward = 157.00, steps = 157\n",
      "00:05:17 [DEBUG] train episode 4273: reward = 135.00, steps = 135\n",
      "00:05:17 [DEBUG] train episode 4274: reward = 119.00, steps = 119\n",
      "00:05:17 [DEBUG] train episode 4275: reward = 200.00, steps = 200\n",
      "00:05:17 [DEBUG] train episode 4276: reward = 144.00, steps = 144\n",
      "00:05:17 [DEBUG] train episode 4277: reward = 164.00, steps = 164\n",
      "00:05:17 [DEBUG] train episode 4278: reward = 143.00, steps = 143\n",
      "00:05:17 [DEBUG] train episode 4279: reward = 200.00, steps = 200\n",
      "00:05:17 [DEBUG] train episode 4280: reward = 161.00, steps = 161\n",
      "00:05:17 [DEBUG] train episode 4281: reward = 171.00, steps = 171\n",
      "00:05:17 [DEBUG] train episode 4282: reward = 200.00, steps = 200\n",
      "00:05:17 [DEBUG] train episode 4283: reward = 200.00, steps = 200\n",
      "00:05:17 [DEBUG] train episode 4284: reward = 162.00, steps = 162\n",
      "00:05:17 [DEBUG] train episode 4285: reward = 190.00, steps = 190\n",
      "00:05:17 [DEBUG] train episode 4286: reward = 154.00, steps = 154\n",
      "00:05:17 [DEBUG] train episode 4287: reward = 149.00, steps = 149\n",
      "00:05:17 [DEBUG] train episode 4288: reward = 200.00, steps = 200\n",
      "00:05:17 [DEBUG] train episode 4289: reward = 176.00, steps = 176\n",
      "00:05:17 [DEBUG] train episode 4290: reward = 163.00, steps = 163\n",
      "00:05:17 [DEBUG] train episode 4291: reward = 136.00, steps = 136\n",
      "00:05:18 [DEBUG] train episode 4292: reward = 164.00, steps = 164\n",
      "00:05:18 [DEBUG] train episode 4293: reward = 200.00, steps = 200\n",
      "00:05:18 [DEBUG] train episode 4294: reward = 200.00, steps = 200\n",
      "00:05:18 [DEBUG] train episode 4295: reward = 169.00, steps = 169\n",
      "00:05:18 [DEBUG] train episode 4296: reward = 166.00, steps = 166\n",
      "00:05:18 [DEBUG] train episode 4297: reward = 147.00, steps = 147\n",
      "00:05:18 [DEBUG] train episode 4298: reward = 200.00, steps = 200\n",
      "00:05:18 [DEBUG] train episode 4299: reward = 141.00, steps = 141\n",
      "00:05:18 [DEBUG] train episode 4300: reward = 117.00, steps = 117\n",
      "00:05:18 [DEBUG] train episode 4301: reward = 200.00, steps = 200\n",
      "00:05:18 [DEBUG] train episode 4302: reward = 200.00, steps = 200\n",
      "00:05:18 [DEBUG] train episode 4303: reward = 169.00, steps = 169\n",
      "00:05:18 [DEBUG] train episode 4304: reward = 155.00, steps = 155\n",
      "00:05:18 [DEBUG] train episode 4305: reward = 144.00, steps = 144\n",
      "00:05:18 [DEBUG] train episode 4306: reward = 200.00, steps = 200\n",
      "00:05:18 [DEBUG] train episode 4307: reward = 161.00, steps = 161\n",
      "00:05:18 [DEBUG] train episode 4308: reward = 114.00, steps = 114\n",
      "00:05:18 [DEBUG] train episode 4309: reward = 170.00, steps = 170\n",
      "00:05:18 [DEBUG] train episode 4310: reward = 181.00, steps = 181\n",
      "00:05:19 [DEBUG] train episode 4311: reward = 191.00, steps = 191\n",
      "00:05:19 [DEBUG] train episode 4312: reward = 159.00, steps = 159\n",
      "00:05:19 [DEBUG] train episode 4313: reward = 132.00, steps = 132\n",
      "00:05:19 [DEBUG] train episode 4314: reward = 175.00, steps = 175\n",
      "00:05:19 [DEBUG] train episode 4315: reward = 131.00, steps = 131\n",
      "00:05:19 [DEBUG] train episode 4316: reward = 154.00, steps = 154\n",
      "00:05:19 [DEBUG] train episode 4317: reward = 145.00, steps = 145\n",
      "00:05:19 [DEBUG] train episode 4318: reward = 184.00, steps = 184\n",
      "00:05:19 [DEBUG] train episode 4319: reward = 158.00, steps = 158\n",
      "00:05:19 [DEBUG] train episode 4320: reward = 123.00, steps = 123\n",
      "00:05:19 [DEBUG] train episode 4321: reward = 162.00, steps = 162\n",
      "00:05:19 [DEBUG] train episode 4322: reward = 138.00, steps = 138\n",
      "00:05:19 [DEBUG] train episode 4323: reward = 135.00, steps = 135\n",
      "00:05:19 [DEBUG] train episode 4324: reward = 119.00, steps = 119\n",
      "00:05:19 [DEBUG] train episode 4325: reward = 149.00, steps = 149\n",
      "00:05:19 [DEBUG] train episode 4326: reward = 122.00, steps = 122\n",
      "00:05:19 [DEBUG] train episode 4327: reward = 200.00, steps = 200\n",
      "00:05:19 [DEBUG] train episode 4328: reward = 200.00, steps = 200\n",
      "00:05:19 [DEBUG] train episode 4329: reward = 195.00, steps = 195\n",
      "00:05:19 [DEBUG] train episode 4330: reward = 166.00, steps = 166\n",
      "00:05:19 [DEBUG] train episode 4331: reward = 200.00, steps = 200\n",
      "00:05:19 [DEBUG] train episode 4332: reward = 169.00, steps = 169\n",
      "00:05:20 [DEBUG] train episode 4333: reward = 137.00, steps = 137\n",
      "00:05:20 [DEBUG] train episode 4334: reward = 161.00, steps = 161\n",
      "00:05:20 [DEBUG] train episode 4335: reward = 117.00, steps = 117\n",
      "00:05:20 [DEBUG] train episode 4336: reward = 200.00, steps = 200\n",
      "00:05:20 [DEBUG] train episode 4337: reward = 126.00, steps = 126\n",
      "00:05:20 [DEBUG] train episode 4338: reward = 151.00, steps = 151\n",
      "00:05:20 [DEBUG] train episode 4339: reward = 200.00, steps = 200\n",
      "00:05:20 [DEBUG] train episode 4340: reward = 163.00, steps = 163\n",
      "00:05:20 [DEBUG] train episode 4341: reward = 170.00, steps = 170\n",
      "00:05:20 [DEBUG] train episode 4342: reward = 141.00, steps = 141\n",
      "00:05:20 [DEBUG] train episode 4343: reward = 164.00, steps = 164\n",
      "00:05:20 [DEBUG] train episode 4344: reward = 200.00, steps = 200\n",
      "00:05:20 [DEBUG] train episode 4345: reward = 195.00, steps = 195\n",
      "00:05:20 [DEBUG] train episode 4346: reward = 149.00, steps = 149\n",
      "00:05:20 [DEBUG] train episode 4347: reward = 152.00, steps = 152\n",
      "00:05:20 [DEBUG] train episode 4348: reward = 135.00, steps = 135\n",
      "00:05:20 [DEBUG] train episode 4349: reward = 169.00, steps = 169\n",
      "00:05:20 [DEBUG] train episode 4350: reward = 135.00, steps = 135\n",
      "00:05:20 [DEBUG] train episode 4351: reward = 170.00, steps = 170\n",
      "00:05:20 [DEBUG] train episode 4352: reward = 120.00, steps = 120\n",
      "00:05:20 [DEBUG] train episode 4353: reward = 156.00, steps = 156\n",
      "00:05:21 [DEBUG] train episode 4354: reward = 200.00, steps = 200\n",
      "00:05:21 [DEBUG] train episode 4355: reward = 124.00, steps = 124\n",
      "00:05:21 [DEBUG] train episode 4356: reward = 160.00, steps = 160\n",
      "00:05:21 [DEBUG] train episode 4357: reward = 178.00, steps = 178\n",
      "00:05:21 [DEBUG] train episode 4358: reward = 157.00, steps = 157\n",
      "00:05:21 [DEBUG] train episode 4359: reward = 200.00, steps = 200\n",
      "00:05:21 [DEBUG] train episode 4360: reward = 200.00, steps = 200\n",
      "00:05:21 [DEBUG] train episode 4361: reward = 161.00, steps = 161\n",
      "00:05:21 [DEBUG] train episode 4362: reward = 147.00, steps = 147\n",
      "00:05:21 [DEBUG] train episode 4363: reward = 200.00, steps = 200\n",
      "00:05:21 [DEBUG] train episode 4364: reward = 166.00, steps = 166\n",
      "00:05:21 [DEBUG] train episode 4365: reward = 186.00, steps = 186\n",
      "00:05:21 [DEBUG] train episode 4366: reward = 189.00, steps = 189\n",
      "00:05:21 [DEBUG] train episode 4367: reward = 160.00, steps = 160\n",
      "00:05:21 [DEBUG] train episode 4368: reward = 129.00, steps = 129\n",
      "00:05:21 [DEBUG] train episode 4369: reward = 137.00, steps = 137\n",
      "00:05:21 [DEBUG] train episode 4370: reward = 200.00, steps = 200\n",
      "00:05:21 [DEBUG] train episode 4371: reward = 132.00, steps = 132\n",
      "00:05:21 [DEBUG] train episode 4372: reward = 167.00, steps = 167\n",
      "00:05:21 [DEBUG] train episode 4373: reward = 142.00, steps = 142\n",
      "00:05:22 [DEBUG] train episode 4374: reward = 194.00, steps = 194\n",
      "00:05:22 [DEBUG] train episode 4375: reward = 141.00, steps = 141\n",
      "00:05:22 [DEBUG] train episode 4376: reward = 200.00, steps = 200\n",
      "00:05:22 [DEBUG] train episode 4377: reward = 165.00, steps = 165\n",
      "00:05:22 [DEBUG] train episode 4378: reward = 155.00, steps = 155\n",
      "00:05:22 [DEBUG] train episode 4379: reward = 200.00, steps = 200\n",
      "00:05:22 [DEBUG] train episode 4380: reward = 171.00, steps = 171\n",
      "00:05:22 [DEBUG] train episode 4381: reward = 145.00, steps = 145\n",
      "00:05:22 [DEBUG] train episode 4382: reward = 170.00, steps = 170\n",
      "00:05:22 [DEBUG] train episode 4383: reward = 190.00, steps = 190\n",
      "00:05:22 [DEBUG] train episode 4384: reward = 116.00, steps = 116\n",
      "00:05:22 [DEBUG] train episode 4385: reward = 175.00, steps = 175\n",
      "00:05:22 [DEBUG] train episode 4386: reward = 200.00, steps = 200\n",
      "00:05:22 [DEBUG] train episode 4387: reward = 149.00, steps = 149\n",
      "00:05:22 [DEBUG] train episode 4388: reward = 139.00, steps = 139\n",
      "00:05:22 [DEBUG] train episode 4389: reward = 123.00, steps = 123\n",
      "00:05:22 [DEBUG] train episode 4390: reward = 191.00, steps = 191\n",
      "00:05:22 [DEBUG] train episode 4391: reward = 152.00, steps = 152\n",
      "00:05:22 [DEBUG] train episode 4392: reward = 169.00, steps = 169\n",
      "00:05:22 [DEBUG] train episode 4393: reward = 200.00, steps = 200\n",
      "00:05:23 [DEBUG] train episode 4394: reward = 130.00, steps = 130\n",
      "00:05:23 [DEBUG] train episode 4395: reward = 120.00, steps = 120\n",
      "00:05:23 [DEBUG] train episode 4396: reward = 163.00, steps = 163\n",
      "00:05:23 [DEBUG] train episode 4397: reward = 129.00, steps = 129\n",
      "00:05:23 [DEBUG] train episode 4398: reward = 187.00, steps = 187\n",
      "00:05:23 [DEBUG] train episode 4399: reward = 200.00, steps = 200\n",
      "00:05:23 [DEBUG] train episode 4400: reward = 120.00, steps = 120\n",
      "00:05:23 [DEBUG] train episode 4401: reward = 133.00, steps = 133\n",
      "00:05:23 [DEBUG] train episode 4402: reward = 200.00, steps = 200\n",
      "00:05:23 [DEBUG] train episode 4403: reward = 200.00, steps = 200\n",
      "00:05:23 [DEBUG] train episode 4404: reward = 200.00, steps = 200\n",
      "00:05:23 [DEBUG] train episode 4405: reward = 154.00, steps = 154\n",
      "00:05:23 [DEBUG] train episode 4406: reward = 126.00, steps = 126\n",
      "00:05:23 [DEBUG] train episode 4407: reward = 117.00, steps = 117\n",
      "00:05:23 [DEBUG] train episode 4408: reward = 139.00, steps = 139\n",
      "00:05:23 [DEBUG] train episode 4409: reward = 146.00, steps = 146\n",
      "00:05:23 [DEBUG] train episode 4410: reward = 150.00, steps = 150\n",
      "00:05:23 [DEBUG] train episode 4411: reward = 132.00, steps = 132\n",
      "00:05:23 [DEBUG] train episode 4412: reward = 133.00, steps = 133\n",
      "00:05:23 [DEBUG] train episode 4413: reward = 149.00, steps = 149\n",
      "00:05:23 [DEBUG] train episode 4414: reward = 179.00, steps = 179\n",
      "00:05:24 [DEBUG] train episode 4415: reward = 106.00, steps = 106\n",
      "00:05:24 [DEBUG] train episode 4416: reward = 165.00, steps = 165\n",
      "00:05:24 [DEBUG] train episode 4417: reward = 161.00, steps = 161\n",
      "00:05:24 [DEBUG] train episode 4418: reward = 139.00, steps = 139\n",
      "00:05:24 [DEBUG] train episode 4419: reward = 136.00, steps = 136\n",
      "00:05:24 [DEBUG] train episode 4420: reward = 160.00, steps = 160\n",
      "00:05:24 [DEBUG] train episode 4421: reward = 120.00, steps = 120\n",
      "00:05:24 [DEBUG] train episode 4422: reward = 200.00, steps = 200\n",
      "00:05:24 [DEBUG] train episode 4423: reward = 200.00, steps = 200\n",
      "00:05:24 [DEBUG] train episode 4424: reward = 179.00, steps = 179\n",
      "00:05:24 [DEBUG] train episode 4425: reward = 139.00, steps = 139\n",
      "00:05:24 [DEBUG] train episode 4426: reward = 126.00, steps = 126\n",
      "00:05:24 [DEBUG] train episode 4427: reward = 126.00, steps = 126\n",
      "00:05:24 [DEBUG] train episode 4428: reward = 200.00, steps = 200\n",
      "00:05:24 [DEBUG] train episode 4429: reward = 200.00, steps = 200\n",
      "00:05:24 [DEBUG] train episode 4430: reward = 164.00, steps = 164\n",
      "00:05:24 [DEBUG] train episode 4431: reward = 156.00, steps = 156\n",
      "00:05:24 [DEBUG] train episode 4432: reward = 128.00, steps = 128\n",
      "00:05:24 [DEBUG] train episode 4433: reward = 200.00, steps = 200\n",
      "00:05:24 [DEBUG] train episode 4434: reward = 127.00, steps = 127\n",
      "00:05:25 [DEBUG] train episode 4435: reward = 200.00, steps = 200\n",
      "00:05:25 [DEBUG] train episode 4436: reward = 190.00, steps = 190\n",
      "00:05:25 [DEBUG] train episode 4437: reward = 105.00, steps = 105\n",
      "00:05:25 [DEBUG] train episode 4438: reward = 200.00, steps = 200\n",
      "00:05:25 [DEBUG] train episode 4439: reward = 154.00, steps = 154\n",
      "00:05:25 [DEBUG] train episode 4440: reward = 142.00, steps = 142\n",
      "00:05:25 [DEBUG] train episode 4441: reward = 175.00, steps = 175\n",
      "00:05:25 [DEBUG] train episode 4442: reward = 167.00, steps = 167\n",
      "00:05:25 [DEBUG] train episode 4443: reward = 161.00, steps = 161\n",
      "00:05:25 [DEBUG] train episode 4444: reward = 109.00, steps = 109\n",
      "00:05:25 [DEBUG] train episode 4445: reward = 144.00, steps = 144\n",
      "00:05:25 [DEBUG] train episode 4446: reward = 108.00, steps = 108\n",
      "00:05:25 [DEBUG] train episode 4447: reward = 125.00, steps = 125\n",
      "00:05:25 [DEBUG] train episode 4448: reward = 149.00, steps = 149\n",
      "00:05:25 [DEBUG] train episode 4449: reward = 159.00, steps = 159\n",
      "00:05:25 [DEBUG] train episode 4450: reward = 183.00, steps = 183\n",
      "00:05:25 [DEBUG] train episode 4451: reward = 200.00, steps = 200\n",
      "00:05:25 [DEBUG] train episode 4452: reward = 187.00, steps = 187\n",
      "00:05:26 [DEBUG] train episode 4453: reward = 191.00, steps = 191\n",
      "00:05:26 [DEBUG] train episode 4454: reward = 160.00, steps = 160\n",
      "00:05:26 [DEBUG] train episode 4455: reward = 200.00, steps = 200\n",
      "00:05:26 [DEBUG] train episode 4456: reward = 200.00, steps = 200\n",
      "00:05:26 [DEBUG] train episode 4457: reward = 92.00, steps = 92\n",
      "00:05:26 [DEBUG] train episode 4458: reward = 148.00, steps = 148\n",
      "00:05:26 [DEBUG] train episode 4459: reward = 139.00, steps = 139\n",
      "00:05:26 [DEBUG] train episode 4460: reward = 155.00, steps = 155\n",
      "00:05:26 [DEBUG] train episode 4461: reward = 164.00, steps = 164\n",
      "00:05:26 [DEBUG] train episode 4462: reward = 198.00, steps = 198\n",
      "00:05:26 [DEBUG] train episode 4463: reward = 128.00, steps = 128\n",
      "00:05:26 [DEBUG] train episode 4464: reward = 200.00, steps = 200\n",
      "00:05:26 [DEBUG] train episode 4465: reward = 190.00, steps = 190\n",
      "00:05:26 [DEBUG] train episode 4466: reward = 159.00, steps = 159\n",
      "00:05:26 [DEBUG] train episode 4467: reward = 124.00, steps = 124\n",
      "00:05:26 [DEBUG] train episode 4468: reward = 150.00, steps = 150\n",
      "00:05:26 [DEBUG] train episode 4469: reward = 185.00, steps = 185\n",
      "00:05:26 [DEBUG] train episode 4470: reward = 200.00, steps = 200\n",
      "00:05:26 [DEBUG] train episode 4471: reward = 171.00, steps = 171\n",
      "00:05:26 [DEBUG] train episode 4472: reward = 200.00, steps = 200\n",
      "00:05:27 [DEBUG] train episode 4473: reward = 150.00, steps = 150\n",
      "00:05:27 [DEBUG] train episode 4474: reward = 200.00, steps = 200\n",
      "00:05:27 [DEBUG] train episode 4475: reward = 195.00, steps = 195\n",
      "00:05:27 [DEBUG] train episode 4476: reward = 148.00, steps = 148\n",
      "00:05:27 [DEBUG] train episode 4477: reward = 116.00, steps = 116\n",
      "00:05:27 [DEBUG] train episode 4478: reward = 132.00, steps = 132\n",
      "00:05:27 [DEBUG] train episode 4479: reward = 164.00, steps = 164\n",
      "00:05:27 [DEBUG] train episode 4480: reward = 156.00, steps = 156\n",
      "00:05:27 [DEBUG] train episode 4481: reward = 116.00, steps = 116\n",
      "00:05:27 [DEBUG] train episode 4482: reward = 200.00, steps = 200\n",
      "00:05:27 [DEBUG] train episode 4483: reward = 149.00, steps = 149\n",
      "00:05:27 [DEBUG] train episode 4484: reward = 98.00, steps = 98\n",
      "00:05:27 [DEBUG] train episode 4485: reward = 113.00, steps = 113\n",
      "00:05:27 [DEBUG] train episode 4486: reward = 125.00, steps = 125\n",
      "00:05:27 [DEBUG] train episode 4487: reward = 143.00, steps = 143\n",
      "00:05:27 [DEBUG] train episode 4488: reward = 104.00, steps = 104\n",
      "00:05:27 [DEBUG] train episode 4489: reward = 123.00, steps = 123\n",
      "00:05:27 [DEBUG] train episode 4490: reward = 166.00, steps = 166\n",
      "00:05:27 [DEBUG] train episode 4491: reward = 124.00, steps = 124\n",
      "00:05:27 [DEBUG] train episode 4492: reward = 149.00, steps = 149\n",
      "00:05:27 [DEBUG] train episode 4493: reward = 109.00, steps = 109\n",
      "00:05:28 [DEBUG] train episode 4494: reward = 189.00, steps = 189\n",
      "00:05:28 [DEBUG] train episode 4495: reward = 132.00, steps = 132\n",
      "00:05:28 [DEBUG] train episode 4496: reward = 175.00, steps = 175\n",
      "00:05:28 [DEBUG] train episode 4497: reward = 118.00, steps = 118\n",
      "00:05:28 [DEBUG] train episode 4498: reward = 200.00, steps = 200\n",
      "00:05:28 [DEBUG] train episode 4499: reward = 200.00, steps = 200\n",
      "00:05:28 [DEBUG] train episode 4500: reward = 150.00, steps = 150\n",
      "00:05:28 [DEBUG] train episode 4501: reward = 120.00, steps = 120\n",
      "00:05:28 [DEBUG] train episode 4502: reward = 158.00, steps = 158\n",
      "00:05:28 [DEBUG] train episode 4503: reward = 167.00, steps = 167\n",
      "00:05:28 [DEBUG] train episode 4504: reward = 140.00, steps = 140\n",
      "00:05:28 [DEBUG] train episode 4505: reward = 122.00, steps = 122\n",
      "00:05:28 [DEBUG] train episode 4506: reward = 107.00, steps = 107\n",
      "00:05:28 [DEBUG] train episode 4507: reward = 166.00, steps = 166\n",
      "00:05:28 [DEBUG] train episode 4508: reward = 127.00, steps = 127\n",
      "00:05:28 [DEBUG] train episode 4509: reward = 153.00, steps = 153\n",
      "00:05:28 [DEBUG] train episode 4510: reward = 145.00, steps = 145\n",
      "00:05:28 [DEBUG] train episode 4511: reward = 143.00, steps = 143\n",
      "00:05:28 [DEBUG] train episode 4512: reward = 153.00, steps = 153\n",
      "00:05:29 [DEBUG] train episode 4513: reward = 200.00, steps = 200\n",
      "00:05:29 [DEBUG] train episode 4514: reward = 159.00, steps = 159\n",
      "00:05:29 [DEBUG] train episode 4515: reward = 133.00, steps = 133\n",
      "00:05:29 [DEBUG] train episode 4516: reward = 124.00, steps = 124\n",
      "00:05:29 [DEBUG] train episode 4517: reward = 166.00, steps = 166\n",
      "00:05:29 [DEBUG] train episode 4518: reward = 136.00, steps = 136\n",
      "00:05:29 [DEBUG] train episode 4519: reward = 200.00, steps = 200\n",
      "00:05:29 [DEBUG] train episode 4520: reward = 160.00, steps = 160\n",
      "00:05:29 [DEBUG] train episode 4521: reward = 124.00, steps = 124\n",
      "00:05:29 [DEBUG] train episode 4522: reward = 200.00, steps = 200\n",
      "00:05:29 [DEBUG] train episode 4523: reward = 131.00, steps = 131\n",
      "00:05:29 [DEBUG] train episode 4524: reward = 145.00, steps = 145\n",
      "00:05:29 [DEBUG] train episode 4525: reward = 182.00, steps = 182\n",
      "00:05:29 [DEBUG] train episode 4526: reward = 177.00, steps = 177\n",
      "00:05:29 [DEBUG] train episode 4527: reward = 175.00, steps = 175\n",
      "00:05:29 [DEBUG] train episode 4528: reward = 146.00, steps = 146\n",
      "00:05:29 [DEBUG] train episode 4529: reward = 122.00, steps = 122\n",
      "00:05:29 [DEBUG] train episode 4530: reward = 164.00, steps = 164\n",
      "00:05:29 [DEBUG] train episode 4531: reward = 142.00, steps = 142\n",
      "00:05:30 [DEBUG] train episode 4532: reward = 150.00, steps = 150\n",
      "00:05:30 [DEBUG] train episode 4533: reward = 141.00, steps = 141\n",
      "00:05:30 [DEBUG] train episode 4534: reward = 179.00, steps = 179\n",
      "00:05:30 [DEBUG] train episode 4535: reward = 130.00, steps = 130\n",
      "00:05:30 [DEBUG] train episode 4536: reward = 188.00, steps = 188\n",
      "00:05:30 [DEBUG] train episode 4537: reward = 121.00, steps = 121\n",
      "00:05:30 [DEBUG] train episode 4538: reward = 158.00, steps = 158\n",
      "00:05:30 [DEBUG] train episode 4539: reward = 133.00, steps = 133\n",
      "00:05:30 [DEBUG] train episode 4540: reward = 133.00, steps = 133\n",
      "00:05:30 [DEBUG] train episode 4541: reward = 163.00, steps = 163\n",
      "00:05:30 [DEBUG] train episode 4542: reward = 117.00, steps = 117\n",
      "00:05:30 [DEBUG] train episode 4543: reward = 200.00, steps = 200\n",
      "00:05:30 [DEBUG] train episode 4544: reward = 114.00, steps = 114\n",
      "00:05:30 [DEBUG] train episode 4545: reward = 200.00, steps = 200\n",
      "00:05:30 [DEBUG] train episode 4546: reward = 200.00, steps = 200\n",
      "00:05:30 [DEBUG] train episode 4547: reward = 200.00, steps = 200\n",
      "00:05:30 [DEBUG] train episode 4548: reward = 143.00, steps = 143\n",
      "00:05:30 [DEBUG] train episode 4549: reward = 128.00, steps = 128\n",
      "00:05:30 [DEBUG] train episode 4550: reward = 137.00, steps = 137\n",
      "00:05:30 [DEBUG] train episode 4551: reward = 102.00, steps = 102\n",
      "00:05:31 [DEBUG] train episode 4552: reward = 180.00, steps = 180\n",
      "00:05:31 [DEBUG] train episode 4553: reward = 148.00, steps = 148\n",
      "00:05:31 [DEBUG] train episode 4554: reward = 192.00, steps = 192\n",
      "00:05:31 [DEBUG] train episode 4555: reward = 116.00, steps = 116\n",
      "00:05:31 [DEBUG] train episode 4556: reward = 126.00, steps = 126\n",
      "00:05:31 [DEBUG] train episode 4557: reward = 124.00, steps = 124\n",
      "00:05:31 [DEBUG] train episode 4558: reward = 144.00, steps = 144\n",
      "00:05:31 [DEBUG] train episode 4559: reward = 153.00, steps = 153\n",
      "00:05:31 [DEBUG] train episode 4560: reward = 125.00, steps = 125\n",
      "00:05:31 [DEBUG] train episode 4561: reward = 200.00, steps = 200\n",
      "00:05:31 [DEBUG] train episode 4562: reward = 103.00, steps = 103\n",
      "00:05:31 [DEBUG] train episode 4563: reward = 128.00, steps = 128\n",
      "00:05:31 [DEBUG] train episode 4564: reward = 113.00, steps = 113\n",
      "00:05:31 [DEBUG] train episode 4565: reward = 117.00, steps = 117\n",
      "00:05:31 [DEBUG] train episode 4566: reward = 130.00, steps = 130\n",
      "00:05:31 [DEBUG] train episode 4567: reward = 131.00, steps = 131\n",
      "00:05:31 [DEBUG] train episode 4568: reward = 159.00, steps = 159\n",
      "00:05:31 [DEBUG] train episode 4569: reward = 144.00, steps = 144\n",
      "00:05:31 [DEBUG] train episode 4570: reward = 194.00, steps = 194\n",
      "00:05:31 [DEBUG] train episode 4571: reward = 200.00, steps = 200\n",
      "00:05:31 [DEBUG] train episode 4572: reward = 200.00, steps = 200\n",
      "00:05:32 [DEBUG] train episode 4573: reward = 200.00, steps = 200\n",
      "00:05:32 [DEBUG] train episode 4574: reward = 200.00, steps = 200\n",
      "00:05:32 [DEBUG] train episode 4575: reward = 175.00, steps = 175\n",
      "00:05:32 [DEBUG] train episode 4576: reward = 200.00, steps = 200\n",
      "00:05:32 [DEBUG] train episode 4577: reward = 132.00, steps = 132\n",
      "00:05:32 [DEBUG] train episode 4578: reward = 139.00, steps = 139\n",
      "00:05:32 [DEBUG] train episode 4579: reward = 200.00, steps = 200\n",
      "00:05:32 [DEBUG] train episode 4580: reward = 200.00, steps = 200\n",
      "00:05:32 [DEBUG] train episode 4581: reward = 200.00, steps = 200\n",
      "00:05:32 [DEBUG] train episode 4582: reward = 200.00, steps = 200\n",
      "00:05:32 [DEBUG] train episode 4583: reward = 172.00, steps = 172\n",
      "00:05:32 [DEBUG] train episode 4584: reward = 138.00, steps = 138\n",
      "00:05:32 [DEBUG] train episode 4585: reward = 119.00, steps = 119\n",
      "00:05:32 [DEBUG] train episode 4586: reward = 183.00, steps = 183\n",
      "00:05:32 [DEBUG] train episode 4587: reward = 133.00, steps = 133\n",
      "00:05:32 [DEBUG] train episode 4588: reward = 160.00, steps = 160\n",
      "00:05:32 [DEBUG] train episode 4589: reward = 134.00, steps = 134\n",
      "00:05:32 [DEBUG] train episode 4590: reward = 175.00, steps = 175\n",
      "00:05:32 [DEBUG] train episode 4591: reward = 200.00, steps = 200\n",
      "00:05:33 [DEBUG] train episode 4592: reward = 190.00, steps = 190\n",
      "00:05:33 [DEBUG] train episode 4593: reward = 191.00, steps = 191\n",
      "00:05:33 [DEBUG] train episode 4594: reward = 146.00, steps = 146\n",
      "00:05:33 [DEBUG] train episode 4595: reward = 200.00, steps = 200\n",
      "00:05:33 [DEBUG] train episode 4596: reward = 200.00, steps = 200\n",
      "00:05:33 [DEBUG] train episode 4597: reward = 197.00, steps = 197\n",
      "00:05:33 [DEBUG] train episode 4598: reward = 182.00, steps = 182\n",
      "00:05:33 [DEBUG] train episode 4599: reward = 153.00, steps = 153\n",
      "00:05:33 [DEBUG] train episode 4600: reward = 153.00, steps = 153\n",
      "00:05:33 [DEBUG] train episode 4601: reward = 200.00, steps = 200\n",
      "00:05:33 [DEBUG] train episode 4602: reward = 114.00, steps = 114\n",
      "00:05:33 [DEBUG] train episode 4603: reward = 148.00, steps = 148\n",
      "00:05:33 [DEBUG] train episode 4604: reward = 191.00, steps = 191\n",
      "00:05:33 [DEBUG] train episode 4605: reward = 162.00, steps = 162\n",
      "00:05:33 [DEBUG] train episode 4606: reward = 200.00, steps = 200\n",
      "00:05:33 [DEBUG] train episode 4607: reward = 169.00, steps = 169\n",
      "00:05:33 [DEBUG] train episode 4608: reward = 147.00, steps = 147\n",
      "00:05:33 [DEBUG] train episode 4609: reward = 164.00, steps = 164\n",
      "00:05:33 [DEBUG] train episode 4610: reward = 143.00, steps = 143\n",
      "00:05:34 [DEBUG] train episode 4611: reward = 200.00, steps = 200\n",
      "00:05:34 [DEBUG] train episode 4612: reward = 155.00, steps = 155\n",
      "00:05:34 [DEBUG] train episode 4613: reward = 123.00, steps = 123\n",
      "00:05:34 [DEBUG] train episode 4614: reward = 163.00, steps = 163\n",
      "00:05:34 [DEBUG] train episode 4615: reward = 130.00, steps = 130\n",
      "00:05:34 [DEBUG] train episode 4616: reward = 141.00, steps = 141\n",
      "00:05:34 [DEBUG] train episode 4617: reward = 187.00, steps = 187\n",
      "00:05:34 [DEBUG] train episode 4618: reward = 193.00, steps = 193\n",
      "00:05:34 [DEBUG] train episode 4619: reward = 167.00, steps = 167\n",
      "00:05:34 [DEBUG] train episode 4620: reward = 115.00, steps = 115\n",
      "00:05:34 [DEBUG] train episode 4621: reward = 136.00, steps = 136\n",
      "00:05:34 [DEBUG] train episode 4622: reward = 136.00, steps = 136\n",
      "00:05:34 [DEBUG] train episode 4623: reward = 183.00, steps = 183\n",
      "00:05:34 [DEBUG] train episode 4624: reward = 192.00, steps = 192\n",
      "00:05:34 [DEBUG] train episode 4625: reward = 137.00, steps = 137\n",
      "00:05:34 [DEBUG] train episode 4626: reward = 170.00, steps = 170\n",
      "00:05:34 [DEBUG] train episode 4627: reward = 200.00, steps = 200\n",
      "00:05:34 [DEBUG] train episode 4628: reward = 149.00, steps = 149\n",
      "00:05:34 [DEBUG] train episode 4629: reward = 180.00, steps = 180\n",
      "00:05:34 [DEBUG] train episode 4630: reward = 154.00, steps = 154\n",
      "00:05:35 [DEBUG] train episode 4631: reward = 200.00, steps = 200\n",
      "00:05:35 [DEBUG] train episode 4632: reward = 122.00, steps = 122\n",
      "00:05:35 [DEBUG] train episode 4633: reward = 117.00, steps = 117\n",
      "00:05:35 [DEBUG] train episode 4634: reward = 136.00, steps = 136\n",
      "00:05:35 [DEBUG] train episode 4635: reward = 177.00, steps = 177\n",
      "00:05:35 [DEBUG] train episode 4636: reward = 133.00, steps = 133\n",
      "00:05:35 [DEBUG] train episode 4637: reward = 140.00, steps = 140\n",
      "00:05:35 [DEBUG] train episode 4638: reward = 183.00, steps = 183\n",
      "00:05:35 [DEBUG] train episode 4639: reward = 171.00, steps = 171\n",
      "00:05:35 [DEBUG] train episode 4640: reward = 139.00, steps = 139\n",
      "00:05:35 [DEBUG] train episode 4641: reward = 151.00, steps = 151\n",
      "00:05:35 [DEBUG] train episode 4642: reward = 127.00, steps = 127\n",
      "00:05:35 [DEBUG] train episode 4643: reward = 152.00, steps = 152\n",
      "00:05:35 [DEBUG] train episode 4644: reward = 200.00, steps = 200\n",
      "00:05:35 [DEBUG] train episode 4645: reward = 162.00, steps = 162\n",
      "00:05:35 [DEBUG] train episode 4646: reward = 124.00, steps = 124\n",
      "00:05:35 [DEBUG] train episode 4647: reward = 176.00, steps = 176\n",
      "00:05:35 [DEBUG] train episode 4648: reward = 151.00, steps = 151\n",
      "00:05:35 [DEBUG] train episode 4649: reward = 154.00, steps = 154\n",
      "00:05:35 [DEBUG] train episode 4650: reward = 188.00, steps = 188\n",
      "00:05:36 [DEBUG] train episode 4651: reward = 184.00, steps = 184\n",
      "00:05:36 [DEBUG] train episode 4652: reward = 119.00, steps = 119\n",
      "00:05:36 [DEBUG] train episode 4653: reward = 176.00, steps = 176\n",
      "00:05:36 [DEBUG] train episode 4654: reward = 135.00, steps = 135\n",
      "00:05:36 [DEBUG] train episode 4655: reward = 139.00, steps = 139\n",
      "00:05:36 [DEBUG] train episode 4656: reward = 160.00, steps = 160\n",
      "00:05:36 [DEBUG] train episode 4657: reward = 149.00, steps = 149\n",
      "00:05:36 [DEBUG] train episode 4658: reward = 139.00, steps = 139\n",
      "00:05:36 [DEBUG] train episode 4659: reward = 200.00, steps = 200\n",
      "00:05:36 [DEBUG] train episode 4660: reward = 181.00, steps = 181\n",
      "00:05:36 [DEBUG] train episode 4661: reward = 175.00, steps = 175\n",
      "00:05:36 [DEBUG] train episode 4662: reward = 165.00, steps = 165\n",
      "00:05:36 [DEBUG] train episode 4663: reward = 176.00, steps = 176\n",
      "00:05:36 [DEBUG] train episode 4664: reward = 162.00, steps = 162\n",
      "00:05:36 [DEBUG] train episode 4665: reward = 200.00, steps = 200\n",
      "00:05:36 [DEBUG] train episode 4666: reward = 149.00, steps = 149\n",
      "00:05:36 [DEBUG] train episode 4667: reward = 129.00, steps = 129\n",
      "00:05:36 [DEBUG] train episode 4668: reward = 193.00, steps = 193\n",
      "00:05:36 [DEBUG] train episode 4669: reward = 200.00, steps = 200\n",
      "00:05:36 [DEBUG] train episode 4670: reward = 200.00, steps = 200\n",
      "00:05:37 [DEBUG] train episode 4671: reward = 200.00, steps = 200\n",
      "00:05:37 [DEBUG] train episode 4672: reward = 200.00, steps = 200\n",
      "00:05:37 [DEBUG] train episode 4673: reward = 200.00, steps = 200\n",
      "00:05:37 [DEBUG] train episode 4674: reward = 200.00, steps = 200\n",
      "00:05:37 [DEBUG] train episode 4675: reward = 170.00, steps = 170\n",
      "00:05:37 [DEBUG] train episode 4676: reward = 152.00, steps = 152\n",
      "00:05:37 [DEBUG] train episode 4677: reward = 200.00, steps = 200\n",
      "00:05:37 [DEBUG] train episode 4678: reward = 200.00, steps = 200\n",
      "00:05:37 [DEBUG] train episode 4679: reward = 163.00, steps = 163\n",
      "00:05:37 [DEBUG] train episode 4680: reward = 199.00, steps = 199\n",
      "00:05:37 [DEBUG] train episode 4681: reward = 200.00, steps = 200\n",
      "00:05:37 [DEBUG] train episode 4682: reward = 200.00, steps = 200\n",
      "00:05:37 [DEBUG] train episode 4683: reward = 150.00, steps = 150\n",
      "00:05:37 [DEBUG] train episode 4684: reward = 178.00, steps = 178\n",
      "00:05:37 [DEBUG] train episode 4685: reward = 200.00, steps = 200\n",
      "00:05:37 [DEBUG] train episode 4686: reward = 148.00, steps = 148\n",
      "00:05:37 [DEBUG] train episode 4687: reward = 173.00, steps = 173\n",
      "00:05:37 [DEBUG] train episode 4688: reward = 171.00, steps = 171\n",
      "00:05:38 [DEBUG] train episode 4689: reward = 200.00, steps = 200\n",
      "00:05:38 [DEBUG] train episode 4690: reward = 194.00, steps = 194\n",
      "00:05:38 [DEBUG] train episode 4691: reward = 168.00, steps = 168\n",
      "00:05:38 [DEBUG] train episode 4692: reward = 200.00, steps = 200\n",
      "00:05:38 [DEBUG] train episode 4693: reward = 200.00, steps = 200\n",
      "00:05:38 [DEBUG] train episode 4694: reward = 170.00, steps = 170\n",
      "00:05:38 [DEBUG] train episode 4695: reward = 200.00, steps = 200\n",
      "00:05:38 [DEBUG] train episode 4696: reward = 200.00, steps = 200\n",
      "00:05:38 [DEBUG] train episode 4697: reward = 179.00, steps = 179\n",
      "00:05:38 [DEBUG] train episode 4698: reward = 200.00, steps = 200\n",
      "00:05:38 [DEBUG] train episode 4699: reward = 168.00, steps = 168\n",
      "00:05:38 [DEBUG] train episode 4700: reward = 150.00, steps = 150\n",
      "00:05:38 [DEBUG] train episode 4701: reward = 186.00, steps = 186\n",
      "00:05:38 [DEBUG] train episode 4702: reward = 200.00, steps = 200\n",
      "00:05:38 [DEBUG] train episode 4703: reward = 200.00, steps = 200\n",
      "00:05:38 [DEBUG] train episode 4704: reward = 146.00, steps = 146\n",
      "00:05:38 [DEBUG] train episode 4705: reward = 159.00, steps = 159\n",
      "00:05:38 [DEBUG] train episode 4706: reward = 184.00, steps = 184\n",
      "00:05:39 [DEBUG] train episode 4707: reward = 200.00, steps = 200\n",
      "00:05:39 [DEBUG] train episode 4708: reward = 174.00, steps = 174\n",
      "00:05:39 [DEBUG] train episode 4709: reward = 155.00, steps = 155\n",
      "00:05:39 [DEBUG] train episode 4710: reward = 200.00, steps = 200\n",
      "00:05:39 [DEBUG] train episode 4711: reward = 153.00, steps = 153\n",
      "00:05:39 [DEBUG] train episode 4712: reward = 130.00, steps = 130\n",
      "00:05:39 [DEBUG] train episode 4713: reward = 200.00, steps = 200\n",
      "00:05:39 [DEBUG] train episode 4714: reward = 188.00, steps = 188\n",
      "00:05:39 [DEBUG] train episode 4715: reward = 180.00, steps = 180\n",
      "00:05:39 [DEBUG] train episode 4716: reward = 200.00, steps = 200\n",
      "00:05:39 [DEBUG] train episode 4717: reward = 183.00, steps = 183\n",
      "00:05:39 [DEBUG] train episode 4718: reward = 200.00, steps = 200\n",
      "00:05:39 [DEBUG] train episode 4719: reward = 174.00, steps = 174\n",
      "00:05:39 [DEBUG] train episode 4720: reward = 200.00, steps = 200\n",
      "00:05:39 [DEBUG] train episode 4721: reward = 200.00, steps = 200\n",
      "00:05:39 [DEBUG] train episode 4722: reward = 200.00, steps = 200\n",
      "00:05:39 [DEBUG] train episode 4723: reward = 200.00, steps = 200\n",
      "00:05:39 [DEBUG] train episode 4724: reward = 200.00, steps = 200\n",
      "00:05:40 [DEBUG] train episode 4725: reward = 174.00, steps = 174\n",
      "00:05:40 [DEBUG] train episode 4726: reward = 200.00, steps = 200\n",
      "00:05:40 [DEBUG] train episode 4727: reward = 190.00, steps = 190\n",
      "00:05:40 [DEBUG] train episode 4728: reward = 200.00, steps = 200\n",
      "00:05:40 [DEBUG] train episode 4729: reward = 200.00, steps = 200\n",
      "00:05:40 [DEBUG] train episode 4730: reward = 200.00, steps = 200\n",
      "00:05:40 [DEBUG] train episode 4731: reward = 200.00, steps = 200\n",
      "00:05:40 [DEBUG] train episode 4732: reward = 200.00, steps = 200\n",
      "00:05:40 [DEBUG] train episode 4733: reward = 200.00, steps = 200\n",
      "00:05:40 [DEBUG] train episode 4734: reward = 200.00, steps = 200\n",
      "00:05:40 [DEBUG] train episode 4735: reward = 200.00, steps = 200\n",
      "00:05:40 [DEBUG] train episode 4736: reward = 200.00, steps = 200\n",
      "00:05:40 [DEBUG] train episode 4737: reward = 200.00, steps = 200\n",
      "00:05:40 [DEBUG] train episode 4738: reward = 200.00, steps = 200\n",
      "00:05:40 [DEBUG] train episode 4739: reward = 200.00, steps = 200\n",
      "00:05:40 [DEBUG] train episode 4740: reward = 200.00, steps = 200\n",
      "00:05:41 [DEBUG] train episode 4741: reward = 200.00, steps = 200\n",
      "00:05:41 [DEBUG] train episode 4742: reward = 198.00, steps = 198\n",
      "00:05:41 [DEBUG] train episode 4743: reward = 200.00, steps = 200\n",
      "00:05:41 [DEBUG] train episode 4744: reward = 177.00, steps = 177\n",
      "00:05:41 [DEBUG] train episode 4745: reward = 200.00, steps = 200\n",
      "00:05:41 [DEBUG] train episode 4746: reward = 200.00, steps = 200\n",
      "00:05:41 [DEBUG] train episode 4747: reward = 200.00, steps = 200\n",
      "00:05:41 [DEBUG] train episode 4748: reward = 178.00, steps = 178\n",
      "00:05:41 [DEBUG] train episode 4749: reward = 200.00, steps = 200\n",
      "00:05:41 [DEBUG] train episode 4750: reward = 196.00, steps = 196\n",
      "00:05:41 [DEBUG] train episode 4751: reward = 200.00, steps = 200\n",
      "00:05:41 [DEBUG] train episode 4752: reward = 200.00, steps = 200\n",
      "00:05:41 [DEBUG] train episode 4753: reward = 200.00, steps = 200\n",
      "00:05:41 [DEBUG] train episode 4754: reward = 179.00, steps = 179\n",
      "00:05:41 [DEBUG] train episode 4755: reward = 200.00, steps = 200\n",
      "00:05:41 [DEBUG] train episode 4756: reward = 180.00, steps = 180\n",
      "00:05:41 [DEBUG] train episode 4757: reward = 200.00, steps = 200\n",
      "00:05:42 [DEBUG] train episode 4758: reward = 153.00, steps = 153\n",
      "00:05:42 [DEBUG] train episode 4759: reward = 157.00, steps = 157\n",
      "00:05:42 [DEBUG] train episode 4760: reward = 156.00, steps = 156\n",
      "00:05:42 [DEBUG] train episode 4761: reward = 178.00, steps = 178\n",
      "00:05:42 [DEBUG] train episode 4762: reward = 154.00, steps = 154\n",
      "00:05:42 [DEBUG] train episode 4763: reward = 200.00, steps = 200\n",
      "00:05:42 [DEBUG] train episode 4764: reward = 200.00, steps = 200\n",
      "00:05:42 [DEBUG] train episode 4765: reward = 200.00, steps = 200\n",
      "00:05:42 [DEBUG] train episode 4766: reward = 200.00, steps = 200\n",
      "00:05:42 [DEBUG] train episode 4767: reward = 182.00, steps = 182\n",
      "00:05:42 [DEBUG] train episode 4768: reward = 200.00, steps = 200\n",
      "00:05:42 [DEBUG] train episode 4769: reward = 200.00, steps = 200\n",
      "00:05:42 [DEBUG] train episode 4770: reward = 200.00, steps = 200\n",
      "00:05:42 [DEBUG] train episode 4771: reward = 193.00, steps = 193\n",
      "00:05:42 [DEBUG] train episode 4772: reward = 181.00, steps = 181\n",
      "00:05:42 [DEBUG] train episode 4773: reward = 196.00, steps = 196\n",
      "00:05:42 [DEBUG] train episode 4774: reward = 166.00, steps = 166\n",
      "00:05:43 [DEBUG] train episode 4775: reward = 195.00, steps = 195\n",
      "00:05:43 [DEBUG] train episode 4776: reward = 200.00, steps = 200\n",
      "00:05:43 [DEBUG] train episode 4777: reward = 200.00, steps = 200\n",
      "00:05:43 [DEBUG] train episode 4778: reward = 200.00, steps = 200\n",
      "00:05:43 [DEBUG] train episode 4779: reward = 163.00, steps = 163\n",
      "00:05:43 [DEBUG] train episode 4780: reward = 176.00, steps = 176\n",
      "00:05:43 [DEBUG] train episode 4781: reward = 200.00, steps = 200\n",
      "00:05:43 [DEBUG] train episode 4782: reward = 186.00, steps = 186\n",
      "00:05:43 [DEBUG] train episode 4783: reward = 139.00, steps = 139\n",
      "00:05:43 [DEBUG] train episode 4784: reward = 200.00, steps = 200\n",
      "00:05:43 [DEBUG] train episode 4785: reward = 158.00, steps = 158\n",
      "00:05:43 [DEBUG] train episode 4786: reward = 182.00, steps = 182\n",
      "00:05:43 [DEBUG] train episode 4787: reward = 183.00, steps = 183\n",
      "00:05:43 [DEBUG] train episode 4788: reward = 200.00, steps = 200\n",
      "00:05:43 [DEBUG] train episode 4789: reward = 173.00, steps = 173\n",
      "00:05:43 [DEBUG] train episode 4790: reward = 200.00, steps = 200\n",
      "00:05:43 [DEBUG] train episode 4791: reward = 172.00, steps = 172\n",
      "00:05:43 [DEBUG] train episode 4792: reward = 200.00, steps = 200\n",
      "00:05:43 [DEBUG] train episode 4793: reward = 172.00, steps = 172\n",
      "00:05:44 [DEBUG] train episode 4794: reward = 187.00, steps = 187\n",
      "00:05:44 [DEBUG] train episode 4795: reward = 194.00, steps = 194\n",
      "00:05:44 [DEBUG] train episode 4796: reward = 200.00, steps = 200\n",
      "00:05:44 [DEBUG] train episode 4797: reward = 200.00, steps = 200\n",
      "00:05:44 [DEBUG] train episode 4798: reward = 200.00, steps = 200\n",
      "00:05:44 [DEBUG] train episode 4799: reward = 200.00, steps = 200\n",
      "00:05:44 [DEBUG] train episode 4800: reward = 200.00, steps = 200\n",
      "00:05:44 [DEBUG] train episode 4801: reward = 200.00, steps = 200\n",
      "00:05:44 [DEBUG] train episode 4802: reward = 170.00, steps = 170\n",
      "00:05:44 [DEBUG] train episode 4803: reward = 178.00, steps = 178\n",
      "00:05:44 [DEBUG] train episode 4804: reward = 200.00, steps = 200\n",
      "00:05:44 [DEBUG] train episode 4805: reward = 200.00, steps = 200\n",
      "00:05:44 [DEBUG] train episode 4806: reward = 200.00, steps = 200\n",
      "00:05:44 [DEBUG] train episode 4807: reward = 200.00, steps = 200\n",
      "00:05:44 [DEBUG] train episode 4808: reward = 200.00, steps = 200\n",
      "00:05:44 [DEBUG] train episode 4809: reward = 181.00, steps = 181\n",
      "00:05:44 [DEBUG] train episode 4810: reward = 200.00, steps = 200\n",
      "00:05:45 [DEBUG] train episode 4811: reward = 155.00, steps = 155\n",
      "00:05:45 [DEBUG] train episode 4812: reward = 200.00, steps = 200\n",
      "00:05:45 [DEBUG] train episode 4813: reward = 130.00, steps = 130\n",
      "00:05:45 [DEBUG] train episode 4814: reward = 200.00, steps = 200\n",
      "00:05:45 [DEBUG] train episode 4815: reward = 146.00, steps = 146\n",
      "00:05:45 [DEBUG] train episode 4816: reward = 196.00, steps = 196\n",
      "00:05:45 [DEBUG] train episode 4817: reward = 198.00, steps = 198\n",
      "00:05:45 [DEBUG] train episode 4818: reward = 178.00, steps = 178\n",
      "00:05:45 [DEBUG] train episode 4819: reward = 200.00, steps = 200\n",
      "00:05:45 [DEBUG] train episode 4820: reward = 168.00, steps = 168\n",
      "00:05:45 [DEBUG] train episode 4821: reward = 200.00, steps = 200\n",
      "00:05:45 [DEBUG] train episode 4822: reward = 178.00, steps = 178\n",
      "00:05:45 [DEBUG] train episode 4823: reward = 189.00, steps = 189\n",
      "00:05:45 [DEBUG] train episode 4824: reward = 198.00, steps = 198\n",
      "00:05:45 [DEBUG] train episode 4825: reward = 170.00, steps = 170\n",
      "00:05:45 [DEBUG] train episode 4826: reward = 178.00, steps = 178\n",
      "00:05:45 [DEBUG] train episode 4827: reward = 149.00, steps = 149\n",
      "00:05:45 [DEBUG] train episode 4828: reward = 200.00, steps = 200\n",
      "00:05:46 [DEBUG] train episode 4829: reward = 179.00, steps = 179\n",
      "00:05:46 [DEBUG] train episode 4830: reward = 177.00, steps = 177\n",
      "00:05:46 [DEBUG] train episode 4831: reward = 200.00, steps = 200\n",
      "00:05:46 [DEBUG] train episode 4832: reward = 200.00, steps = 200\n",
      "00:05:46 [DEBUG] train episode 4833: reward = 200.00, steps = 200\n",
      "00:05:46 [DEBUG] train episode 4834: reward = 180.00, steps = 180\n",
      "00:05:46 [DEBUG] train episode 4835: reward = 200.00, steps = 200\n",
      "00:05:46 [DEBUG] train episode 4836: reward = 176.00, steps = 176\n",
      "00:05:46 [DEBUG] train episode 4837: reward = 200.00, steps = 200\n",
      "00:05:46 [DEBUG] train episode 4838: reward = 141.00, steps = 141\n",
      "00:05:46 [DEBUG] train episode 4839: reward = 200.00, steps = 200\n",
      "00:05:46 [DEBUG] train episode 4840: reward = 200.00, steps = 200\n",
      "00:05:46 [DEBUG] train episode 4841: reward = 167.00, steps = 167\n",
      "00:05:46 [DEBUG] train episode 4842: reward = 174.00, steps = 174\n",
      "00:05:46 [DEBUG] train episode 4843: reward = 200.00, steps = 200\n",
      "00:05:46 [DEBUG] train episode 4844: reward = 194.00, steps = 194\n",
      "00:05:46 [DEBUG] train episode 4845: reward = 200.00, steps = 200\n",
      "00:05:47 [DEBUG] train episode 4846: reward = 167.00, steps = 167\n",
      "00:05:47 [DEBUG] train episode 4847: reward = 173.00, steps = 173\n",
      "00:05:47 [DEBUG] train episode 4848: reward = 136.00, steps = 136\n",
      "00:05:47 [DEBUG] train episode 4849: reward = 181.00, steps = 181\n",
      "00:05:47 [DEBUG] train episode 4850: reward = 172.00, steps = 172\n",
      "00:05:47 [DEBUG] train episode 4851: reward = 161.00, steps = 161\n",
      "00:05:47 [DEBUG] train episode 4852: reward = 159.00, steps = 159\n",
      "00:05:47 [DEBUG] train episode 4853: reward = 200.00, steps = 200\n",
      "00:05:47 [DEBUG] train episode 4854: reward = 146.00, steps = 146\n",
      "00:05:47 [DEBUG] train episode 4855: reward = 128.00, steps = 128\n",
      "00:05:47 [DEBUG] train episode 4856: reward = 136.00, steps = 136\n",
      "00:05:47 [DEBUG] train episode 4857: reward = 149.00, steps = 149\n",
      "00:05:47 [DEBUG] train episode 4858: reward = 200.00, steps = 200\n",
      "00:05:47 [DEBUG] train episode 4859: reward = 200.00, steps = 200\n",
      "00:05:47 [DEBUG] train episode 4860: reward = 130.00, steps = 130\n",
      "00:05:47 [DEBUG] train episode 4861: reward = 191.00, steps = 191\n",
      "00:05:47 [DEBUG] train episode 4862: reward = 140.00, steps = 140\n",
      "00:05:47 [DEBUG] train episode 4863: reward = 200.00, steps = 200\n",
      "00:05:47 [DEBUG] train episode 4864: reward = 167.00, steps = 167\n",
      "00:05:47 [DEBUG] train episode 4865: reward = 168.00, steps = 168\n",
      "00:05:48 [DEBUG] train episode 4866: reward = 137.00, steps = 137\n",
      "00:05:48 [DEBUG] train episode 4867: reward = 115.00, steps = 115\n",
      "00:05:48 [DEBUG] train episode 4868: reward = 136.00, steps = 136\n",
      "00:05:48 [DEBUG] train episode 4869: reward = 177.00, steps = 177\n",
      "00:05:48 [DEBUG] train episode 4870: reward = 174.00, steps = 174\n",
      "00:05:48 [DEBUG] train episode 4871: reward = 122.00, steps = 122\n",
      "00:05:48 [DEBUG] train episode 4872: reward = 200.00, steps = 200\n",
      "00:05:48 [DEBUG] train episode 4873: reward = 200.00, steps = 200\n",
      "00:05:48 [DEBUG] train episode 4874: reward = 124.00, steps = 124\n",
      "00:05:48 [DEBUG] train episode 4875: reward = 187.00, steps = 187\n",
      "00:05:48 [DEBUG] train episode 4876: reward = 138.00, steps = 138\n",
      "00:05:48 [DEBUG] train episode 4877: reward = 154.00, steps = 154\n",
      "00:05:48 [DEBUG] train episode 4878: reward = 122.00, steps = 122\n",
      "00:05:48 [DEBUG] train episode 4879: reward = 138.00, steps = 138\n",
      "00:05:48 [DEBUG] train episode 4880: reward = 125.00, steps = 125\n",
      "00:05:48 [DEBUG] train episode 4881: reward = 124.00, steps = 124\n",
      "00:05:48 [DEBUG] train episode 4882: reward = 200.00, steps = 200\n",
      "00:05:48 [DEBUG] train episode 4883: reward = 161.00, steps = 161\n",
      "00:05:48 [DEBUG] train episode 4884: reward = 164.00, steps = 164\n",
      "00:05:48 [DEBUG] train episode 4885: reward = 120.00, steps = 120\n",
      "00:05:49 [DEBUG] train episode 4886: reward = 200.00, steps = 200\n",
      "00:05:49 [DEBUG] train episode 4887: reward = 127.00, steps = 127\n",
      "00:05:49 [DEBUG] train episode 4888: reward = 139.00, steps = 139\n",
      "00:05:49 [DEBUG] train episode 4889: reward = 177.00, steps = 177\n",
      "00:05:49 [DEBUG] train episode 4890: reward = 166.00, steps = 166\n",
      "00:05:49 [DEBUG] train episode 4891: reward = 151.00, steps = 151\n",
      "00:05:49 [DEBUG] train episode 4892: reward = 184.00, steps = 184\n",
      "00:05:49 [DEBUG] train episode 4893: reward = 143.00, steps = 143\n",
      "00:05:49 [DEBUG] train episode 4894: reward = 200.00, steps = 200\n",
      "00:05:49 [DEBUG] train episode 4895: reward = 175.00, steps = 175\n",
      "00:05:49 [DEBUG] train episode 4896: reward = 200.00, steps = 200\n",
      "00:05:49 [DEBUG] train episode 4897: reward = 200.00, steps = 200\n",
      "00:05:49 [DEBUG] train episode 4898: reward = 130.00, steps = 130\n",
      "00:05:49 [DEBUG] train episode 4899: reward = 134.00, steps = 134\n",
      "00:05:49 [DEBUG] train episode 4900: reward = 181.00, steps = 181\n",
      "00:05:49 [DEBUG] train episode 4901: reward = 160.00, steps = 160\n",
      "00:05:49 [DEBUG] train episode 4902: reward = 196.00, steps = 196\n",
      "00:05:49 [DEBUG] train episode 4903: reward = 162.00, steps = 162\n",
      "00:05:49 [DEBUG] train episode 4904: reward = 170.00, steps = 170\n",
      "00:05:49 [DEBUG] train episode 4905: reward = 187.00, steps = 187\n",
      "00:05:50 [DEBUG] train episode 4906: reward = 149.00, steps = 149\n",
      "00:05:50 [DEBUG] train episode 4907: reward = 171.00, steps = 171\n",
      "00:05:50 [DEBUG] train episode 4908: reward = 190.00, steps = 190\n",
      "00:05:50 [DEBUG] train episode 4909: reward = 158.00, steps = 158\n",
      "00:05:50 [DEBUG] train episode 4910: reward = 148.00, steps = 148\n",
      "00:05:50 [DEBUG] train episode 4911: reward = 197.00, steps = 197\n",
      "00:05:50 [DEBUG] train episode 4912: reward = 147.00, steps = 147\n",
      "00:05:50 [DEBUG] train episode 4913: reward = 130.00, steps = 130\n",
      "00:05:50 [DEBUG] train episode 4914: reward = 161.00, steps = 161\n",
      "00:05:50 [DEBUG] train episode 4915: reward = 185.00, steps = 185\n",
      "00:05:50 [DEBUG] train episode 4916: reward = 160.00, steps = 160\n",
      "00:05:50 [DEBUG] train episode 4917: reward = 132.00, steps = 132\n",
      "00:05:50 [DEBUG] train episode 4918: reward = 157.00, steps = 157\n",
      "00:05:50 [DEBUG] train episode 4919: reward = 142.00, steps = 142\n",
      "00:05:50 [DEBUG] train episode 4920: reward = 152.00, steps = 152\n",
      "00:05:50 [DEBUG] train episode 4921: reward = 166.00, steps = 166\n",
      "00:05:50 [DEBUG] train episode 4922: reward = 200.00, steps = 200\n",
      "00:05:50 [DEBUG] train episode 4923: reward = 178.00, steps = 178\n",
      "00:05:50 [DEBUG] train episode 4924: reward = 200.00, steps = 200\n",
      "00:05:50 [DEBUG] train episode 4925: reward = 200.00, steps = 200\n",
      "00:05:51 [DEBUG] train episode 4926: reward = 193.00, steps = 193\n",
      "00:05:51 [DEBUG] train episode 4927: reward = 174.00, steps = 174\n",
      "00:05:51 [DEBUG] train episode 4928: reward = 128.00, steps = 128\n",
      "00:05:51 [DEBUG] train episode 4929: reward = 198.00, steps = 198\n",
      "00:05:51 [DEBUG] train episode 4930: reward = 142.00, steps = 142\n",
      "00:05:51 [DEBUG] train episode 4931: reward = 200.00, steps = 200\n",
      "00:05:51 [DEBUG] train episode 4932: reward = 145.00, steps = 145\n",
      "00:05:51 [DEBUG] train episode 4933: reward = 168.00, steps = 168\n",
      "00:05:51 [DEBUG] train episode 4934: reward = 143.00, steps = 143\n",
      "00:05:51 [DEBUG] train episode 4935: reward = 142.00, steps = 142\n",
      "00:05:51 [DEBUG] train episode 4936: reward = 130.00, steps = 130\n",
      "00:05:51 [DEBUG] train episode 4937: reward = 170.00, steps = 170\n",
      "00:05:51 [DEBUG] train episode 4938: reward = 143.00, steps = 143\n",
      "00:05:51 [DEBUG] train episode 4939: reward = 131.00, steps = 131\n",
      "00:05:51 [DEBUG] train episode 4940: reward = 159.00, steps = 159\n",
      "00:05:51 [DEBUG] train episode 4941: reward = 168.00, steps = 168\n",
      "00:05:51 [DEBUG] train episode 4942: reward = 200.00, steps = 200\n",
      "00:05:51 [DEBUG] train episode 4943: reward = 172.00, steps = 172\n",
      "00:05:51 [DEBUG] train episode 4944: reward = 177.00, steps = 177\n",
      "00:05:51 [DEBUG] train episode 4945: reward = 200.00, steps = 200\n",
      "00:05:52 [DEBUG] train episode 4946: reward = 172.00, steps = 172\n",
      "00:05:52 [DEBUG] train episode 4947: reward = 200.00, steps = 200\n",
      "00:05:52 [DEBUG] train episode 4948: reward = 150.00, steps = 150\n",
      "00:05:52 [DEBUG] train episode 4949: reward = 166.00, steps = 166\n",
      "00:05:52 [DEBUG] train episode 4950: reward = 129.00, steps = 129\n",
      "00:05:52 [DEBUG] train episode 4951: reward = 152.00, steps = 152\n",
      "00:05:52 [DEBUG] train episode 4952: reward = 194.00, steps = 194\n",
      "00:05:52 [DEBUG] train episode 4953: reward = 176.00, steps = 176\n",
      "00:05:52 [DEBUG] train episode 4954: reward = 200.00, steps = 200\n",
      "00:05:52 [DEBUG] train episode 4955: reward = 200.00, steps = 200\n",
      "00:05:52 [DEBUG] train episode 4956: reward = 168.00, steps = 168\n",
      "00:05:52 [DEBUG] train episode 4957: reward = 187.00, steps = 187\n",
      "00:05:52 [DEBUG] train episode 4958: reward = 181.00, steps = 181\n",
      "00:05:52 [DEBUG] train episode 4959: reward = 134.00, steps = 134\n",
      "00:05:52 [DEBUG] train episode 4960: reward = 138.00, steps = 138\n",
      "00:05:52 [DEBUG] train episode 4961: reward = 125.00, steps = 125\n",
      "00:05:52 [DEBUG] train episode 4962: reward = 162.00, steps = 162\n",
      "00:05:52 [DEBUG] train episode 4963: reward = 200.00, steps = 200\n",
      "00:05:52 [DEBUG] train episode 4964: reward = 162.00, steps = 162\n",
      "00:05:53 [DEBUG] train episode 4965: reward = 192.00, steps = 192\n",
      "00:05:53 [DEBUG] train episode 4966: reward = 141.00, steps = 141\n",
      "00:05:53 [DEBUG] train episode 4967: reward = 181.00, steps = 181\n",
      "00:05:53 [DEBUG] train episode 4968: reward = 153.00, steps = 153\n",
      "00:05:53 [DEBUG] train episode 4969: reward = 200.00, steps = 200\n",
      "00:05:53 [DEBUG] train episode 4970: reward = 149.00, steps = 149\n",
      "00:05:53 [DEBUG] train episode 4971: reward = 172.00, steps = 172\n",
      "00:05:53 [DEBUG] train episode 4972: reward = 200.00, steps = 200\n",
      "00:05:53 [DEBUG] train episode 4973: reward = 174.00, steps = 174\n",
      "00:05:53 [DEBUG] train episode 4974: reward = 149.00, steps = 149\n",
      "00:05:53 [DEBUG] train episode 4975: reward = 200.00, steps = 200\n",
      "00:05:53 [DEBUG] train episode 4976: reward = 140.00, steps = 140\n",
      "00:05:53 [DEBUG] train episode 4977: reward = 153.00, steps = 153\n",
      "00:05:53 [DEBUG] train episode 4978: reward = 183.00, steps = 183\n",
      "00:05:53 [DEBUG] train episode 4979: reward = 161.00, steps = 161\n",
      "00:05:53 [DEBUG] train episode 4980: reward = 148.00, steps = 148\n",
      "00:05:53 [DEBUG] train episode 4981: reward = 174.00, steps = 174\n",
      "00:05:53 [DEBUG] train episode 4982: reward = 200.00, steps = 200\n",
      "00:05:53 [DEBUG] train episode 4983: reward = 137.00, steps = 137\n",
      "00:05:54 [DEBUG] train episode 4984: reward = 200.00, steps = 200\n",
      "00:05:54 [DEBUG] train episode 4985: reward = 149.00, steps = 149\n",
      "00:05:54 [DEBUG] train episode 4986: reward = 200.00, steps = 200\n",
      "00:05:54 [DEBUG] train episode 4987: reward = 158.00, steps = 158\n",
      "00:05:54 [DEBUG] train episode 4988: reward = 194.00, steps = 194\n",
      "00:05:54 [DEBUG] train episode 4989: reward = 191.00, steps = 191\n",
      "00:05:54 [DEBUG] train episode 4990: reward = 198.00, steps = 198\n",
      "00:05:54 [DEBUG] train episode 4991: reward = 200.00, steps = 200\n",
      "00:05:54 [DEBUG] train episode 4992: reward = 134.00, steps = 134\n",
      "00:05:54 [DEBUG] train episode 4993: reward = 135.00, steps = 135\n",
      "00:05:54 [DEBUG] train episode 4994: reward = 131.00, steps = 131\n",
      "00:05:54 [DEBUG] train episode 4995: reward = 167.00, steps = 167\n",
      "00:05:54 [DEBUG] train episode 4996: reward = 142.00, steps = 142\n",
      "00:05:54 [DEBUG] train episode 4997: reward = 200.00, steps = 200\n",
      "00:05:54 [DEBUG] train episode 4998: reward = 122.00, steps = 122\n",
      "00:05:54 [DEBUG] train episode 4999: reward = 191.00, steps = 191\n",
      "00:05:54 [DEBUG] train episode 5000: reward = 200.00, steps = 200\n",
      "00:05:54 [DEBUG] train episode 5001: reward = 199.00, steps = 199\n",
      "00:05:54 [DEBUG] train episode 5002: reward = 171.00, steps = 171\n",
      "00:05:55 [DEBUG] train episode 5003: reward = 151.00, steps = 151\n",
      "00:05:55 [DEBUG] train episode 5004: reward = 200.00, steps = 200\n",
      "00:05:55 [DEBUG] train episode 5005: reward = 200.00, steps = 200\n",
      "00:05:55 [DEBUG] train episode 5006: reward = 200.00, steps = 200\n",
      "00:05:55 [DEBUG] train episode 5007: reward = 139.00, steps = 139\n",
      "00:05:55 [DEBUG] train episode 5008: reward = 152.00, steps = 152\n",
      "00:05:55 [DEBUG] train episode 5009: reward = 146.00, steps = 146\n",
      "00:05:55 [DEBUG] train episode 5010: reward = 200.00, steps = 200\n",
      "00:05:55 [DEBUG] train episode 5011: reward = 135.00, steps = 135\n",
      "00:05:55 [DEBUG] train episode 5012: reward = 130.00, steps = 130\n",
      "00:05:55 [DEBUG] train episode 5013: reward = 200.00, steps = 200\n",
      "00:05:55 [DEBUG] train episode 5014: reward = 187.00, steps = 187\n",
      "00:05:55 [DEBUG] train episode 5015: reward = 200.00, steps = 200\n",
      "00:05:55 [DEBUG] train episode 5016: reward = 169.00, steps = 169\n",
      "00:05:55 [DEBUG] train episode 5017: reward = 124.00, steps = 124\n",
      "00:05:55 [DEBUG] train episode 5018: reward = 123.00, steps = 123\n",
      "00:05:55 [DEBUG] train episode 5019: reward = 139.00, steps = 139\n",
      "00:05:55 [DEBUG] train episode 5020: reward = 140.00, steps = 140\n",
      "00:05:55 [DEBUG] train episode 5021: reward = 163.00, steps = 163\n",
      "00:05:55 [DEBUG] train episode 5022: reward = 181.00, steps = 181\n",
      "00:05:56 [DEBUG] train episode 5023: reward = 156.00, steps = 156\n",
      "00:05:56 [DEBUG] train episode 5024: reward = 172.00, steps = 172\n",
      "00:05:56 [DEBUG] train episode 5025: reward = 200.00, steps = 200\n",
      "00:05:56 [DEBUG] train episode 5026: reward = 200.00, steps = 200\n",
      "00:05:56 [DEBUG] train episode 5027: reward = 150.00, steps = 150\n",
      "00:05:56 [DEBUG] train episode 5028: reward = 174.00, steps = 174\n",
      "00:05:56 [DEBUG] train episode 5029: reward = 200.00, steps = 200\n",
      "00:05:56 [DEBUG] train episode 5030: reward = 200.00, steps = 200\n",
      "00:05:56 [DEBUG] train episode 5031: reward = 169.00, steps = 169\n",
      "00:05:56 [DEBUG] train episode 5032: reward = 154.00, steps = 154\n",
      "00:05:56 [DEBUG] train episode 5033: reward = 200.00, steps = 200\n",
      "00:05:56 [DEBUG] train episode 5034: reward = 180.00, steps = 180\n",
      "00:05:56 [DEBUG] train episode 5035: reward = 200.00, steps = 200\n",
      "00:05:56 [DEBUG] train episode 5036: reward = 200.00, steps = 200\n",
      "00:05:56 [DEBUG] train episode 5037: reward = 171.00, steps = 171\n",
      "00:05:56 [DEBUG] train episode 5038: reward = 200.00, steps = 200\n",
      "00:05:56 [DEBUG] train episode 5039: reward = 136.00, steps = 136\n",
      "00:05:56 [DEBUG] train episode 5040: reward = 152.00, steps = 152\n",
      "00:05:56 [DEBUG] train episode 5041: reward = 177.00, steps = 177\n",
      "00:05:57 [DEBUG] train episode 5042: reward = 199.00, steps = 199\n",
      "00:05:57 [DEBUG] train episode 5043: reward = 162.00, steps = 162\n",
      "00:05:57 [DEBUG] train episode 5044: reward = 174.00, steps = 174\n",
      "00:05:57 [DEBUG] train episode 5045: reward = 141.00, steps = 141\n",
      "00:05:57 [DEBUG] train episode 5046: reward = 167.00, steps = 167\n",
      "00:05:57 [DEBUG] train episode 5047: reward = 183.00, steps = 183\n",
      "00:05:57 [DEBUG] train episode 5048: reward = 200.00, steps = 200\n",
      "00:05:57 [DEBUG] train episode 5049: reward = 175.00, steps = 175\n",
      "00:05:57 [DEBUG] train episode 5050: reward = 170.00, steps = 170\n",
      "00:05:57 [DEBUG] train episode 5051: reward = 200.00, steps = 200\n",
      "00:05:57 [DEBUG] train episode 5052: reward = 153.00, steps = 153\n",
      "00:05:57 [DEBUG] train episode 5053: reward = 186.00, steps = 186\n",
      "00:05:57 [DEBUG] train episode 5054: reward = 178.00, steps = 178\n",
      "00:05:57 [DEBUG] train episode 5055: reward = 120.00, steps = 120\n",
      "00:05:57 [DEBUG] train episode 5056: reward = 200.00, steps = 200\n",
      "00:05:57 [DEBUG] train episode 5057: reward = 178.00, steps = 178\n",
      "00:05:57 [DEBUG] train episode 5058: reward = 200.00, steps = 200\n",
      "00:05:57 [DEBUG] train episode 5059: reward = 178.00, steps = 178\n",
      "00:05:57 [DEBUG] train episode 5060: reward = 144.00, steps = 144\n",
      "00:05:58 [DEBUG] train episode 5061: reward = 190.00, steps = 190\n",
      "00:05:58 [DEBUG] train episode 5062: reward = 200.00, steps = 200\n",
      "00:05:58 [DEBUG] train episode 5063: reward = 200.00, steps = 200\n",
      "00:05:58 [DEBUG] train episode 5064: reward = 200.00, steps = 200\n",
      "00:05:58 [DEBUG] train episode 5065: reward = 167.00, steps = 167\n",
      "00:05:58 [DEBUG] train episode 5066: reward = 160.00, steps = 160\n",
      "00:05:58 [DEBUG] train episode 5067: reward = 200.00, steps = 200\n",
      "00:05:58 [DEBUG] train episode 5068: reward = 134.00, steps = 134\n",
      "00:05:58 [DEBUG] train episode 5069: reward = 154.00, steps = 154\n",
      "00:05:58 [DEBUG] train episode 5070: reward = 176.00, steps = 176\n",
      "00:05:58 [DEBUG] train episode 5071: reward = 191.00, steps = 191\n",
      "00:05:58 [DEBUG] train episode 5072: reward = 183.00, steps = 183\n",
      "00:05:58 [DEBUG] train episode 5073: reward = 144.00, steps = 144\n",
      "00:05:58 [DEBUG] train episode 5074: reward = 136.00, steps = 136\n",
      "00:05:58 [DEBUG] train episode 5075: reward = 164.00, steps = 164\n",
      "00:05:58 [DEBUG] train episode 5076: reward = 160.00, steps = 160\n",
      "00:05:58 [DEBUG] train episode 5077: reward = 179.00, steps = 179\n",
      "00:05:58 [DEBUG] train episode 5078: reward = 196.00, steps = 196\n",
      "00:05:58 [DEBUG] train episode 5079: reward = 146.00, steps = 146\n",
      "00:05:59 [DEBUG] train episode 5080: reward = 200.00, steps = 200\n",
      "00:05:59 [DEBUG] train episode 5081: reward = 200.00, steps = 200\n",
      "00:05:59 [DEBUG] train episode 5082: reward = 200.00, steps = 200\n",
      "00:05:59 [DEBUG] train episode 5083: reward = 191.00, steps = 191\n",
      "00:05:59 [DEBUG] train episode 5084: reward = 200.00, steps = 200\n",
      "00:05:59 [DEBUG] train episode 5085: reward = 179.00, steps = 179\n",
      "00:05:59 [DEBUG] train episode 5086: reward = 175.00, steps = 175\n",
      "00:05:59 [DEBUG] train episode 5087: reward = 184.00, steps = 184\n",
      "00:05:59 [DEBUG] train episode 5088: reward = 200.00, steps = 200\n",
      "00:05:59 [DEBUG] train episode 5089: reward = 200.00, steps = 200\n",
      "00:05:59 [DEBUG] train episode 5090: reward = 154.00, steps = 154\n",
      "00:05:59 [DEBUG] train episode 5091: reward = 173.00, steps = 173\n",
      "00:05:59 [DEBUG] train episode 5092: reward = 200.00, steps = 200\n",
      "00:05:59 [DEBUG] train episode 5093: reward = 159.00, steps = 159\n",
      "00:05:59 [DEBUG] train episode 5094: reward = 108.00, steps = 108\n",
      "00:05:59 [DEBUG] train episode 5095: reward = 194.00, steps = 194\n",
      "00:05:59 [DEBUG] train episode 5096: reward = 173.00, steps = 173\n",
      "00:05:59 [DEBUG] train episode 5097: reward = 200.00, steps = 200\n",
      "00:05:59 [DEBUG] train episode 5098: reward = 161.00, steps = 161\n",
      "00:06:00 [DEBUG] train episode 5099: reward = 166.00, steps = 166\n",
      "00:06:00 [DEBUG] train episode 5100: reward = 200.00, steps = 200\n",
      "00:06:00 [DEBUG] train episode 5101: reward = 200.00, steps = 200\n",
      "00:06:00 [DEBUG] train episode 5102: reward = 160.00, steps = 160\n",
      "00:06:00 [DEBUG] train episode 5103: reward = 168.00, steps = 168\n",
      "00:06:00 [DEBUG] train episode 5104: reward = 200.00, steps = 200\n",
      "00:06:00 [DEBUG] train episode 5105: reward = 123.00, steps = 123\n",
      "00:06:00 [DEBUG] train episode 5106: reward = 121.00, steps = 121\n",
      "00:06:00 [DEBUG] train episode 5107: reward = 158.00, steps = 158\n",
      "00:06:00 [DEBUG] train episode 5108: reward = 132.00, steps = 132\n",
      "00:06:00 [DEBUG] train episode 5109: reward = 193.00, steps = 193\n",
      "00:06:00 [DEBUG] train episode 5110: reward = 200.00, steps = 200\n",
      "00:06:00 [DEBUG] train episode 5111: reward = 200.00, steps = 200\n",
      "00:06:00 [DEBUG] train episode 5112: reward = 139.00, steps = 139\n",
      "00:06:00 [DEBUG] train episode 5113: reward = 162.00, steps = 162\n",
      "00:06:00 [DEBUG] train episode 5114: reward = 173.00, steps = 173\n",
      "00:06:00 [DEBUG] train episode 5115: reward = 200.00, steps = 200\n",
      "00:06:00 [DEBUG] train episode 5116: reward = 146.00, steps = 146\n",
      "00:06:00 [DEBUG] train episode 5117: reward = 132.00, steps = 132\n",
      "00:06:01 [DEBUG] train episode 5118: reward = 200.00, steps = 200\n",
      "00:06:01 [DEBUG] train episode 5119: reward = 137.00, steps = 137\n",
      "00:06:01 [DEBUG] train episode 5120: reward = 153.00, steps = 153\n",
      "00:06:01 [DEBUG] train episode 5121: reward = 188.00, steps = 188\n",
      "00:06:01 [DEBUG] train episode 5122: reward = 167.00, steps = 167\n",
      "00:06:01 [DEBUG] train episode 5123: reward = 157.00, steps = 157\n",
      "00:06:01 [DEBUG] train episode 5124: reward = 109.00, steps = 109\n",
      "00:06:01 [DEBUG] train episode 5125: reward = 123.00, steps = 123\n",
      "00:06:01 [DEBUG] train episode 5126: reward = 163.00, steps = 163\n",
      "00:06:01 [DEBUG] train episode 5127: reward = 131.00, steps = 131\n",
      "00:06:01 [DEBUG] train episode 5128: reward = 147.00, steps = 147\n",
      "00:06:01 [DEBUG] train episode 5129: reward = 136.00, steps = 136\n",
      "00:06:01 [DEBUG] train episode 5130: reward = 193.00, steps = 193\n",
      "00:06:01 [DEBUG] train episode 5131: reward = 125.00, steps = 125\n",
      "00:06:01 [DEBUG] train episode 5132: reward = 200.00, steps = 200\n",
      "00:06:01 [DEBUG] train episode 5133: reward = 122.00, steps = 122\n",
      "00:06:01 [DEBUG] train episode 5134: reward = 195.00, steps = 195\n",
      "00:06:01 [DEBUG] train episode 5135: reward = 147.00, steps = 147\n",
      "00:06:01 [DEBUG] train episode 5136: reward = 152.00, steps = 152\n",
      "00:06:01 [DEBUG] train episode 5137: reward = 168.00, steps = 168\n",
      "00:06:01 [DEBUG] train episode 5138: reward = 148.00, steps = 148\n",
      "00:06:01 [DEBUG] train episode 5139: reward = 180.00, steps = 180\n",
      "00:06:01 [DEBUG] train episode 5140: reward = 161.00, steps = 161\n",
      "00:06:02 [DEBUG] train episode 5141: reward = 200.00, steps = 200\n",
      "00:06:02 [DEBUG] train episode 5142: reward = 200.00, steps = 200\n",
      "00:06:02 [DEBUG] train episode 5143: reward = 200.00, steps = 200\n",
      "00:06:02 [DEBUG] train episode 5144: reward = 118.00, steps = 118\n",
      "00:06:02 [DEBUG] train episode 5145: reward = 154.00, steps = 154\n",
      "00:06:02 [DEBUG] train episode 5146: reward = 152.00, steps = 152\n",
      "00:06:02 [DEBUG] train episode 5147: reward = 187.00, steps = 187\n",
      "00:06:02 [DEBUG] train episode 5148: reward = 200.00, steps = 200\n",
      "00:06:02 [DEBUG] train episode 5149: reward = 200.00, steps = 200\n",
      "00:06:02 [DEBUG] train episode 5150: reward = 200.00, steps = 200\n",
      "00:06:02 [DEBUG] train episode 5151: reward = 173.00, steps = 173\n",
      "00:06:02 [DEBUG] train episode 5152: reward = 187.00, steps = 187\n",
      "00:06:02 [DEBUG] train episode 5153: reward = 168.00, steps = 168\n",
      "00:06:02 [DEBUG] train episode 5154: reward = 193.00, steps = 193\n",
      "00:06:02 [DEBUG] train episode 5155: reward = 152.00, steps = 152\n",
      "00:06:02 [DEBUG] train episode 5156: reward = 200.00, steps = 200\n",
      "00:06:02 [DEBUG] train episode 5157: reward = 148.00, steps = 148\n",
      "00:06:02 [DEBUG] train episode 5158: reward = 139.00, steps = 139\n",
      "00:06:02 [DEBUG] train episode 5159: reward = 200.00, steps = 200\n",
      "00:06:03 [DEBUG] train episode 5160: reward = 180.00, steps = 180\n",
      "00:06:03 [DEBUG] train episode 5161: reward = 168.00, steps = 168\n",
      "00:06:03 [DEBUG] train episode 5162: reward = 134.00, steps = 134\n",
      "00:06:03 [DEBUG] train episode 5163: reward = 126.00, steps = 126\n",
      "00:06:03 [DEBUG] train episode 5164: reward = 170.00, steps = 170\n",
      "00:06:03 [DEBUG] train episode 5165: reward = 200.00, steps = 200\n",
      "00:06:03 [DEBUG] train episode 5166: reward = 200.00, steps = 200\n",
      "00:06:03 [DEBUG] train episode 5167: reward = 133.00, steps = 133\n",
      "00:06:03 [DEBUG] train episode 5168: reward = 175.00, steps = 175\n",
      "00:06:03 [DEBUG] train episode 5169: reward = 161.00, steps = 161\n",
      "00:06:03 [DEBUG] train episode 5170: reward = 200.00, steps = 200\n",
      "00:06:03 [DEBUG] train episode 5171: reward = 150.00, steps = 150\n",
      "00:06:03 [DEBUG] train episode 5172: reward = 152.00, steps = 152\n",
      "00:06:03 [DEBUG] train episode 5173: reward = 200.00, steps = 200\n",
      "00:06:03 [DEBUG] train episode 5174: reward = 200.00, steps = 200\n",
      "00:06:03 [DEBUG] train episode 5175: reward = 146.00, steps = 146\n",
      "00:06:03 [DEBUG] train episode 5176: reward = 196.00, steps = 196\n",
      "00:06:03 [DEBUG] train episode 5177: reward = 161.00, steps = 161\n",
      "00:06:03 [DEBUG] train episode 5178: reward = 136.00, steps = 136\n",
      "00:06:03 [DEBUG] train episode 5179: reward = 165.00, steps = 165\n",
      "00:06:04 [DEBUG] train episode 5180: reward = 154.00, steps = 154\n",
      "00:06:04 [DEBUG] train episode 5181: reward = 144.00, steps = 144\n",
      "00:06:04 [DEBUG] train episode 5182: reward = 200.00, steps = 200\n",
      "00:06:04 [DEBUG] train episode 5183: reward = 167.00, steps = 167\n",
      "00:06:04 [DEBUG] train episode 5184: reward = 156.00, steps = 156\n",
      "00:06:04 [DEBUG] train episode 5185: reward = 149.00, steps = 149\n",
      "00:06:04 [DEBUG] train episode 5186: reward = 200.00, steps = 200\n",
      "00:06:04 [DEBUG] train episode 5187: reward = 200.00, steps = 200\n",
      "00:06:04 [DEBUG] train episode 5188: reward = 184.00, steps = 184\n",
      "00:06:04 [DEBUG] train episode 5189: reward = 140.00, steps = 140\n",
      "00:06:04 [DEBUG] train episode 5190: reward = 176.00, steps = 176\n",
      "00:06:04 [DEBUG] train episode 5191: reward = 154.00, steps = 154\n",
      "00:06:04 [DEBUG] train episode 5192: reward = 190.00, steps = 190\n",
      "00:06:04 [DEBUG] train episode 5193: reward = 160.00, steps = 160\n",
      "00:06:04 [DEBUG] train episode 5194: reward = 188.00, steps = 188\n",
      "00:06:04 [DEBUG] train episode 5195: reward = 159.00, steps = 159\n",
      "00:06:04 [DEBUG] train episode 5196: reward = 185.00, steps = 185\n",
      "00:06:04 [DEBUG] train episode 5197: reward = 174.00, steps = 174\n",
      "00:06:04 [DEBUG] train episode 5198: reward = 189.00, steps = 189\n",
      "00:06:04 [DEBUG] train episode 5199: reward = 174.00, steps = 174\n",
      "00:06:05 [DEBUG] train episode 5200: reward = 191.00, steps = 191\n",
      "00:06:05 [DEBUG] train episode 5201: reward = 167.00, steps = 167\n",
      "00:06:05 [DEBUG] train episode 5202: reward = 146.00, steps = 146\n",
      "00:06:05 [DEBUG] train episode 5203: reward = 178.00, steps = 178\n",
      "00:06:05 [DEBUG] train episode 5204: reward = 135.00, steps = 135\n",
      "00:06:05 [DEBUG] train episode 5205: reward = 142.00, steps = 142\n",
      "00:06:05 [DEBUG] train episode 5206: reward = 186.00, steps = 186\n",
      "00:06:05 [DEBUG] train episode 5207: reward = 188.00, steps = 188\n",
      "00:06:05 [DEBUG] train episode 5208: reward = 179.00, steps = 179\n",
      "00:06:05 [DEBUG] train episode 5209: reward = 200.00, steps = 200\n",
      "00:06:05 [DEBUG] train episode 5210: reward = 200.00, steps = 200\n",
      "00:06:05 [DEBUG] train episode 5211: reward = 200.00, steps = 200\n",
      "00:06:05 [DEBUG] train episode 5212: reward = 138.00, steps = 138\n",
      "00:06:05 [DEBUG] train episode 5213: reward = 142.00, steps = 142\n",
      "00:06:05 [DEBUG] train episode 5214: reward = 153.00, steps = 153\n",
      "00:06:05 [DEBUG] train episode 5215: reward = 158.00, steps = 158\n",
      "00:06:05 [DEBUG] train episode 5216: reward = 200.00, steps = 200\n",
      "00:06:05 [DEBUG] train episode 5217: reward = 200.00, steps = 200\n",
      "00:06:05 [DEBUG] train episode 5218: reward = 180.00, steps = 180\n",
      "00:06:05 [DEBUG] train episode 5219: reward = 157.00, steps = 157\n",
      "00:06:06 [DEBUG] train episode 5220: reward = 145.00, steps = 145\n",
      "00:06:06 [DEBUG] train episode 5221: reward = 200.00, steps = 200\n",
      "00:06:06 [DEBUG] train episode 5222: reward = 183.00, steps = 183\n",
      "00:06:06 [DEBUG] train episode 5223: reward = 200.00, steps = 200\n",
      "00:06:06 [DEBUG] train episode 5224: reward = 171.00, steps = 171\n",
      "00:06:06 [DEBUG] train episode 5225: reward = 172.00, steps = 172\n",
      "00:06:06 [DEBUG] train episode 5226: reward = 168.00, steps = 168\n",
      "00:06:06 [DEBUG] train episode 5227: reward = 154.00, steps = 154\n",
      "00:06:06 [DEBUG] train episode 5228: reward = 160.00, steps = 160\n",
      "00:06:06 [DEBUG] train episode 5229: reward = 200.00, steps = 200\n",
      "00:06:06 [DEBUG] train episode 5230: reward = 141.00, steps = 141\n",
      "00:06:06 [DEBUG] train episode 5231: reward = 133.00, steps = 133\n",
      "00:06:06 [DEBUG] train episode 5232: reward = 167.00, steps = 167\n",
      "00:06:06 [DEBUG] train episode 5233: reward = 132.00, steps = 132\n",
      "00:06:06 [DEBUG] train episode 5234: reward = 200.00, steps = 200\n",
      "00:06:06 [DEBUG] train episode 5235: reward = 170.00, steps = 170\n",
      "00:06:06 [DEBUG] train episode 5236: reward = 177.00, steps = 177\n",
      "00:06:06 [DEBUG] train episode 5237: reward = 200.00, steps = 200\n",
      "00:06:06 [DEBUG] train episode 5238: reward = 200.00, steps = 200\n",
      "00:06:06 [DEBUG] train episode 5239: reward = 188.00, steps = 188\n",
      "00:06:07 [DEBUG] train episode 5240: reward = 200.00, steps = 200\n",
      "00:06:07 [DEBUG] train episode 5241: reward = 140.00, steps = 140\n",
      "00:06:07 [DEBUG] train episode 5242: reward = 139.00, steps = 139\n",
      "00:06:07 [DEBUG] train episode 5243: reward = 192.00, steps = 192\n",
      "00:06:07 [DEBUG] train episode 5244: reward = 182.00, steps = 182\n",
      "00:06:07 [DEBUG] train episode 5245: reward = 125.00, steps = 125\n",
      "00:06:07 [DEBUG] train episode 5246: reward = 150.00, steps = 150\n",
      "00:06:07 [DEBUG] train episode 5247: reward = 112.00, steps = 112\n",
      "00:06:07 [DEBUG] train episode 5248: reward = 169.00, steps = 169\n",
      "00:06:07 [DEBUG] train episode 5249: reward = 113.00, steps = 113\n",
      "00:06:07 [DEBUG] train episode 5250: reward = 193.00, steps = 193\n",
      "00:06:07 [DEBUG] train episode 5251: reward = 121.00, steps = 121\n",
      "00:06:07 [DEBUG] train episode 5252: reward = 135.00, steps = 135\n",
      "00:06:07 [DEBUG] train episode 5253: reward = 172.00, steps = 172\n",
      "00:06:07 [DEBUG] train episode 5254: reward = 145.00, steps = 145\n",
      "00:06:07 [DEBUG] train episode 5255: reward = 155.00, steps = 155\n",
      "00:06:07 [DEBUG] train episode 5256: reward = 200.00, steps = 200\n",
      "00:06:07 [DEBUG] train episode 5257: reward = 109.00, steps = 109\n",
      "00:06:07 [DEBUG] train episode 5258: reward = 174.00, steps = 174\n",
      "00:06:07 [DEBUG] train episode 5259: reward = 200.00, steps = 200\n",
      "00:06:07 [DEBUG] train episode 5260: reward = 117.00, steps = 117\n",
      "00:06:07 [DEBUG] train episode 5261: reward = 194.00, steps = 194\n",
      "00:06:08 [DEBUG] train episode 5262: reward = 146.00, steps = 146\n",
      "00:06:08 [DEBUG] train episode 5263: reward = 184.00, steps = 184\n",
      "00:06:08 [DEBUG] train episode 5264: reward = 148.00, steps = 148\n",
      "00:06:08 [DEBUG] train episode 5265: reward = 150.00, steps = 150\n",
      "00:06:08 [DEBUG] train episode 5266: reward = 120.00, steps = 120\n",
      "00:06:08 [DEBUG] train episode 5267: reward = 154.00, steps = 154\n",
      "00:06:08 [DEBUG] train episode 5268: reward = 186.00, steps = 186\n",
      "00:06:08 [DEBUG] train episode 5269: reward = 173.00, steps = 173\n",
      "00:06:08 [DEBUG] train episode 5270: reward = 139.00, steps = 139\n",
      "00:06:08 [DEBUG] train episode 5271: reward = 200.00, steps = 200\n",
      "00:06:08 [DEBUG] train episode 5272: reward = 166.00, steps = 166\n",
      "00:06:08 [DEBUG] train episode 5273: reward = 200.00, steps = 200\n",
      "00:06:08 [DEBUG] train episode 5274: reward = 200.00, steps = 200\n",
      "00:06:08 [DEBUG] train episode 5275: reward = 200.00, steps = 200\n",
      "00:06:08 [DEBUG] train episode 5276: reward = 156.00, steps = 156\n",
      "00:06:08 [DEBUG] train episode 5277: reward = 130.00, steps = 130\n",
      "00:06:08 [DEBUG] train episode 5278: reward = 148.00, steps = 148\n",
      "00:06:08 [DEBUG] train episode 5279: reward = 200.00, steps = 200\n",
      "00:06:08 [DEBUG] train episode 5280: reward = 184.00, steps = 184\n",
      "00:06:08 [DEBUG] train episode 5281: reward = 200.00, steps = 200\n",
      "00:06:09 [DEBUG] train episode 5282: reward = 159.00, steps = 159\n",
      "00:06:09 [DEBUG] train episode 5283: reward = 140.00, steps = 140\n",
      "00:06:09 [DEBUG] train episode 5284: reward = 181.00, steps = 181\n",
      "00:06:09 [DEBUG] train episode 5285: reward = 155.00, steps = 155\n",
      "00:06:09 [DEBUG] train episode 5286: reward = 124.00, steps = 124\n",
      "00:06:09 [DEBUG] train episode 5287: reward = 160.00, steps = 160\n",
      "00:06:09 [DEBUG] train episode 5288: reward = 116.00, steps = 116\n",
      "00:06:09 [DEBUG] train episode 5289: reward = 123.00, steps = 123\n",
      "00:06:09 [DEBUG] train episode 5290: reward = 167.00, steps = 167\n",
      "00:06:09 [DEBUG] train episode 5291: reward = 133.00, steps = 133\n",
      "00:06:09 [DEBUG] train episode 5292: reward = 172.00, steps = 172\n",
      "00:06:09 [DEBUG] train episode 5293: reward = 134.00, steps = 134\n",
      "00:06:09 [DEBUG] train episode 5294: reward = 163.00, steps = 163\n",
      "00:06:09 [DEBUG] train episode 5295: reward = 134.00, steps = 134\n",
      "00:06:09 [DEBUG] train episode 5296: reward = 200.00, steps = 200\n",
      "00:06:09 [DEBUG] train episode 5297: reward = 132.00, steps = 132\n",
      "00:06:09 [DEBUG] train episode 5298: reward = 138.00, steps = 138\n",
      "00:06:09 [DEBUG] train episode 5299: reward = 129.00, steps = 129\n",
      "00:06:09 [DEBUG] train episode 5300: reward = 129.00, steps = 129\n",
      "00:06:09 [DEBUG] train episode 5301: reward = 128.00, steps = 128\n",
      "00:06:09 [DEBUG] train episode 5302: reward = 178.00, steps = 178\n",
      "00:06:09 [DEBUG] train episode 5303: reward = 174.00, steps = 174\n",
      "00:06:10 [DEBUG] train episode 5304: reward = 162.00, steps = 162\n",
      "00:06:10 [DEBUG] train episode 5305: reward = 155.00, steps = 155\n",
      "00:06:10 [DEBUG] train episode 5306: reward = 145.00, steps = 145\n",
      "00:06:10 [DEBUG] train episode 5307: reward = 111.00, steps = 111\n",
      "00:06:10 [DEBUG] train episode 5308: reward = 143.00, steps = 143\n",
      "00:06:10 [DEBUG] train episode 5309: reward = 188.00, steps = 188\n",
      "00:06:10 [DEBUG] train episode 5310: reward = 200.00, steps = 200\n",
      "00:06:10 [DEBUG] train episode 5311: reward = 187.00, steps = 187\n",
      "00:06:10 [DEBUG] train episode 5312: reward = 130.00, steps = 130\n",
      "00:06:10 [DEBUG] train episode 5313: reward = 116.00, steps = 116\n",
      "00:06:10 [DEBUG] train episode 5314: reward = 184.00, steps = 184\n",
      "00:06:10 [DEBUG] train episode 5315: reward = 190.00, steps = 190\n",
      "00:06:10 [DEBUG] train episode 5316: reward = 145.00, steps = 145\n",
      "00:06:10 [DEBUG] train episode 5317: reward = 160.00, steps = 160\n",
      "00:06:10 [DEBUG] train episode 5318: reward = 200.00, steps = 200\n",
      "00:06:10 [DEBUG] train episode 5319: reward = 200.00, steps = 200\n",
      "00:06:10 [DEBUG] train episode 5320: reward = 149.00, steps = 149\n",
      "00:06:10 [DEBUG] train episode 5321: reward = 119.00, steps = 119\n",
      "00:06:10 [DEBUG] train episode 5322: reward = 136.00, steps = 136\n",
      "00:06:10 [DEBUG] train episode 5323: reward = 116.00, steps = 116\n",
      "00:06:10 [DEBUG] train episode 5324: reward = 162.00, steps = 162\n",
      "00:06:10 [DEBUG] train episode 5325: reward = 114.00, steps = 114\n",
      "00:06:11 [DEBUG] train episode 5326: reward = 131.00, steps = 131\n",
      "00:06:11 [DEBUG] train episode 5327: reward = 182.00, steps = 182\n",
      "00:06:11 [DEBUG] train episode 5328: reward = 146.00, steps = 146\n",
      "00:06:11 [DEBUG] train episode 5329: reward = 164.00, steps = 164\n",
      "00:06:11 [DEBUG] train episode 5330: reward = 131.00, steps = 131\n",
      "00:06:11 [DEBUG] train episode 5331: reward = 184.00, steps = 184\n",
      "00:06:11 [DEBUG] train episode 5332: reward = 115.00, steps = 115\n",
      "00:06:11 [DEBUG] train episode 5333: reward = 154.00, steps = 154\n",
      "00:06:11 [DEBUG] train episode 5334: reward = 114.00, steps = 114\n",
      "00:06:11 [DEBUG] train episode 5335: reward = 126.00, steps = 126\n",
      "00:06:11 [DEBUG] train episode 5336: reward = 139.00, steps = 139\n",
      "00:06:11 [DEBUG] train episode 5337: reward = 168.00, steps = 168\n",
      "00:06:11 [DEBUG] train episode 5338: reward = 117.00, steps = 117\n",
      "00:06:11 [DEBUG] train episode 5339: reward = 160.00, steps = 160\n",
      "00:06:11 [DEBUG] train episode 5340: reward = 160.00, steps = 160\n",
      "00:06:11 [DEBUG] train episode 5341: reward = 135.00, steps = 135\n",
      "00:06:11 [DEBUG] train episode 5342: reward = 150.00, steps = 150\n",
      "00:06:11 [DEBUG] train episode 5343: reward = 184.00, steps = 184\n",
      "00:06:11 [DEBUG] train episode 5344: reward = 168.00, steps = 168\n",
      "00:06:11 [DEBUG] train episode 5345: reward = 200.00, steps = 200\n",
      "00:06:11 [DEBUG] train episode 5346: reward = 184.00, steps = 184\n",
      "00:06:11 [DEBUG] train episode 5347: reward = 200.00, steps = 200\n",
      "00:06:12 [DEBUG] train episode 5348: reward = 185.00, steps = 185\n",
      "00:06:12 [DEBUG] train episode 5349: reward = 101.00, steps = 101\n",
      "00:06:12 [DEBUG] train episode 5350: reward = 200.00, steps = 200\n",
      "00:06:12 [DEBUG] train episode 5351: reward = 132.00, steps = 132\n",
      "00:06:12 [DEBUG] train episode 5352: reward = 160.00, steps = 160\n",
      "00:06:12 [DEBUG] train episode 5353: reward = 133.00, steps = 133\n",
      "00:06:12 [DEBUG] train episode 5354: reward = 200.00, steps = 200\n",
      "00:06:12 [DEBUG] train episode 5355: reward = 128.00, steps = 128\n",
      "00:06:12 [DEBUG] train episode 5356: reward = 166.00, steps = 166\n",
      "00:06:12 [DEBUG] train episode 5357: reward = 196.00, steps = 196\n",
      "00:06:12 [DEBUG] train episode 5358: reward = 200.00, steps = 200\n",
      "00:06:12 [DEBUG] train episode 5359: reward = 153.00, steps = 153\n",
      "00:06:12 [DEBUG] train episode 5360: reward = 200.00, steps = 200\n",
      "00:06:12 [DEBUG] train episode 5361: reward = 169.00, steps = 169\n",
      "00:06:12 [DEBUG] train episode 5362: reward = 119.00, steps = 119\n",
      "00:06:12 [DEBUG] train episode 5363: reward = 183.00, steps = 183\n",
      "00:06:12 [DEBUG] train episode 5364: reward = 155.00, steps = 155\n",
      "00:06:12 [DEBUG] train episode 5365: reward = 136.00, steps = 136\n",
      "00:06:12 [DEBUG] train episode 5366: reward = 155.00, steps = 155\n",
      "00:06:12 [DEBUG] train episode 5367: reward = 166.00, steps = 166\n",
      "00:06:12 [DEBUG] train episode 5368: reward = 99.00, steps = 99\n",
      "00:06:12 [DEBUG] train episode 5369: reward = 144.00, steps = 144\n",
      "00:06:13 [DEBUG] train episode 5370: reward = 187.00, steps = 187\n",
      "00:06:13 [DEBUG] train episode 5371: reward = 164.00, steps = 164\n",
      "00:06:13 [DEBUG] train episode 5372: reward = 124.00, steps = 124\n",
      "00:06:13 [DEBUG] train episode 5373: reward = 177.00, steps = 177\n",
      "00:06:13 [DEBUG] train episode 5374: reward = 145.00, steps = 145\n",
      "00:06:13 [DEBUG] train episode 5375: reward = 200.00, steps = 200\n",
      "00:06:13 [DEBUG] train episode 5376: reward = 197.00, steps = 197\n",
      "00:06:13 [DEBUG] train episode 5377: reward = 142.00, steps = 142\n",
      "00:06:13 [DEBUG] train episode 5378: reward = 187.00, steps = 187\n",
      "00:06:13 [DEBUG] train episode 5379: reward = 159.00, steps = 159\n",
      "00:06:13 [DEBUG] train episode 5380: reward = 185.00, steps = 185\n",
      "00:06:13 [DEBUG] train episode 5381: reward = 178.00, steps = 178\n",
      "00:06:13 [DEBUG] train episode 5382: reward = 163.00, steps = 163\n",
      "00:06:13 [DEBUG] train episode 5383: reward = 181.00, steps = 181\n",
      "00:06:13 [DEBUG] train episode 5384: reward = 178.00, steps = 178\n",
      "00:06:13 [DEBUG] train episode 5385: reward = 200.00, steps = 200\n",
      "00:06:13 [DEBUG] train episode 5386: reward = 126.00, steps = 126\n",
      "00:06:13 [DEBUG] train episode 5387: reward = 159.00, steps = 159\n",
      "00:06:13 [DEBUG] train episode 5388: reward = 200.00, steps = 200\n",
      "00:06:13 [DEBUG] train episode 5389: reward = 153.00, steps = 153\n",
      "00:06:14 [DEBUG] train episode 5390: reward = 174.00, steps = 174\n",
      "00:06:14 [DEBUG] train episode 5391: reward = 153.00, steps = 153\n",
      "00:06:14 [DEBUG] train episode 5392: reward = 177.00, steps = 177\n",
      "00:06:14 [DEBUG] train episode 5393: reward = 185.00, steps = 185\n",
      "00:06:14 [DEBUG] train episode 5394: reward = 178.00, steps = 178\n",
      "00:06:14 [DEBUG] train episode 5395: reward = 137.00, steps = 137\n",
      "00:06:14 [DEBUG] train episode 5396: reward = 184.00, steps = 184\n",
      "00:06:14 [DEBUG] train episode 5397: reward = 139.00, steps = 139\n",
      "00:06:14 [DEBUG] train episode 5398: reward = 200.00, steps = 200\n",
      "00:06:14 [DEBUG] train episode 5399: reward = 112.00, steps = 112\n",
      "00:06:14 [DEBUG] train episode 5400: reward = 162.00, steps = 162\n",
      "00:06:14 [DEBUG] train episode 5401: reward = 167.00, steps = 167\n",
      "00:06:14 [DEBUG] train episode 5402: reward = 188.00, steps = 188\n",
      "00:06:14 [DEBUG] train episode 5403: reward = 170.00, steps = 170\n",
      "00:06:14 [DEBUG] train episode 5404: reward = 175.00, steps = 175\n",
      "00:06:14 [DEBUG] train episode 5405: reward = 173.00, steps = 173\n",
      "00:06:14 [DEBUG] train episode 5406: reward = 200.00, steps = 200\n",
      "00:06:14 [DEBUG] train episode 5407: reward = 120.00, steps = 120\n",
      "00:06:14 [DEBUG] train episode 5408: reward = 200.00, steps = 200\n",
      "00:06:14 [DEBUG] train episode 5409: reward = 111.00, steps = 111\n",
      "00:06:15 [DEBUG] train episode 5410: reward = 178.00, steps = 178\n",
      "00:06:15 [DEBUG] train episode 5411: reward = 168.00, steps = 168\n",
      "00:06:15 [DEBUG] train episode 5412: reward = 147.00, steps = 147\n",
      "00:06:15 [DEBUG] train episode 5413: reward = 200.00, steps = 200\n",
      "00:06:15 [DEBUG] train episode 5414: reward = 177.00, steps = 177\n",
      "00:06:15 [DEBUG] train episode 5415: reward = 135.00, steps = 135\n",
      "00:06:15 [DEBUG] train episode 5416: reward = 136.00, steps = 136\n",
      "00:06:15 [DEBUG] train episode 5417: reward = 200.00, steps = 200\n",
      "00:06:15 [DEBUG] train episode 5418: reward = 164.00, steps = 164\n",
      "00:06:15 [DEBUG] train episode 5419: reward = 141.00, steps = 141\n",
      "00:06:15 [DEBUG] train episode 5420: reward = 200.00, steps = 200\n",
      "00:06:15 [DEBUG] train episode 5421: reward = 191.00, steps = 191\n",
      "00:06:15 [DEBUG] train episode 5422: reward = 175.00, steps = 175\n",
      "00:06:15 [DEBUG] train episode 5423: reward = 171.00, steps = 171\n",
      "00:06:15 [DEBUG] train episode 5424: reward = 159.00, steps = 159\n",
      "00:06:15 [DEBUG] train episode 5425: reward = 168.00, steps = 168\n",
      "00:06:15 [DEBUG] train episode 5426: reward = 134.00, steps = 134\n",
      "00:06:15 [DEBUG] train episode 5427: reward = 156.00, steps = 156\n",
      "00:06:15 [DEBUG] train episode 5428: reward = 164.00, steps = 164\n",
      "00:06:15 [DEBUG] train episode 5429: reward = 184.00, steps = 184\n",
      "00:06:16 [DEBUG] train episode 5430: reward = 199.00, steps = 199\n",
      "00:06:16 [DEBUG] train episode 5431: reward = 146.00, steps = 146\n",
      "00:06:16 [DEBUG] train episode 5432: reward = 200.00, steps = 200\n",
      "00:06:16 [DEBUG] train episode 5433: reward = 200.00, steps = 200\n",
      "00:06:16 [DEBUG] train episode 5434: reward = 149.00, steps = 149\n",
      "00:06:16 [DEBUG] train episode 5435: reward = 188.00, steps = 188\n",
      "00:06:16 [DEBUG] train episode 5436: reward = 200.00, steps = 200\n",
      "00:06:16 [DEBUG] train episode 5437: reward = 141.00, steps = 141\n",
      "00:06:16 [DEBUG] train episode 5438: reward = 146.00, steps = 146\n",
      "00:06:16 [DEBUG] train episode 5439: reward = 150.00, steps = 150\n",
      "00:06:16 [DEBUG] train episode 5440: reward = 161.00, steps = 161\n",
      "00:06:16 [DEBUG] train episode 5441: reward = 150.00, steps = 150\n",
      "00:06:16 [DEBUG] train episode 5442: reward = 155.00, steps = 155\n",
      "00:06:16 [DEBUG] train episode 5443: reward = 187.00, steps = 187\n",
      "00:06:16 [DEBUG] train episode 5444: reward = 200.00, steps = 200\n",
      "00:06:16 [DEBUG] train episode 5445: reward = 200.00, steps = 200\n",
      "00:06:16 [DEBUG] train episode 5446: reward = 200.00, steps = 200\n",
      "00:06:17 [DEBUG] train episode 5447: reward = 152.00, steps = 152\n",
      "00:06:17 [DEBUG] train episode 5448: reward = 150.00, steps = 150\n",
      "00:06:17 [DEBUG] train episode 5449: reward = 200.00, steps = 200\n",
      "00:06:17 [DEBUG] train episode 5450: reward = 200.00, steps = 200\n",
      "00:06:17 [DEBUG] train episode 5451: reward = 200.00, steps = 200\n",
      "00:06:17 [DEBUG] train episode 5452: reward = 200.00, steps = 200\n",
      "00:06:17 [DEBUG] train episode 5453: reward = 157.00, steps = 157\n",
      "00:06:17 [DEBUG] train episode 5454: reward = 174.00, steps = 174\n",
      "00:06:17 [DEBUG] train episode 5455: reward = 133.00, steps = 133\n",
      "00:06:17 [DEBUG] train episode 5456: reward = 149.00, steps = 149\n",
      "00:06:17 [DEBUG] train episode 5457: reward = 157.00, steps = 157\n",
      "00:06:17 [DEBUG] train episode 5458: reward = 180.00, steps = 180\n",
      "00:06:17 [DEBUG] train episode 5459: reward = 200.00, steps = 200\n",
      "00:06:17 [DEBUG] train episode 5460: reward = 165.00, steps = 165\n",
      "00:06:17 [DEBUG] train episode 5461: reward = 200.00, steps = 200\n",
      "00:06:17 [DEBUG] train episode 5462: reward = 200.00, steps = 200\n",
      "00:06:17 [DEBUG] train episode 5463: reward = 172.00, steps = 172\n",
      "00:06:17 [DEBUG] train episode 5464: reward = 164.00, steps = 164\n",
      "00:06:17 [DEBUG] train episode 5465: reward = 175.00, steps = 175\n",
      "00:06:18 [DEBUG] train episode 5466: reward = 162.00, steps = 162\n",
      "00:06:18 [DEBUG] train episode 5467: reward = 200.00, steps = 200\n",
      "00:06:18 [DEBUG] train episode 5468: reward = 138.00, steps = 138\n",
      "00:06:18 [DEBUG] train episode 5469: reward = 188.00, steps = 188\n",
      "00:06:18 [DEBUG] train episode 5470: reward = 200.00, steps = 200\n",
      "00:06:18 [DEBUG] train episode 5471: reward = 163.00, steps = 163\n",
      "00:06:18 [DEBUG] train episode 5472: reward = 200.00, steps = 200\n",
      "00:06:18 [DEBUG] train episode 5473: reward = 199.00, steps = 199\n",
      "00:06:18 [DEBUG] train episode 5474: reward = 200.00, steps = 200\n",
      "00:06:18 [DEBUG] train episode 5475: reward = 156.00, steps = 156\n",
      "00:06:18 [DEBUG] train episode 5476: reward = 200.00, steps = 200\n",
      "00:06:18 [DEBUG] train episode 5477: reward = 131.00, steps = 131\n",
      "00:06:18 [DEBUG] train episode 5478: reward = 200.00, steps = 200\n",
      "00:06:18 [DEBUG] train episode 5479: reward = 178.00, steps = 178\n",
      "00:06:18 [DEBUG] train episode 5480: reward = 150.00, steps = 150\n",
      "00:06:18 [DEBUG] train episode 5481: reward = 149.00, steps = 149\n",
      "00:06:18 [DEBUG] train episode 5482: reward = 143.00, steps = 143\n",
      "00:06:18 [DEBUG] train episode 5483: reward = 200.00, steps = 200\n",
      "00:06:18 [DEBUG] train episode 5484: reward = 142.00, steps = 142\n",
      "00:06:19 [DEBUG] train episode 5485: reward = 178.00, steps = 178\n",
      "00:06:19 [DEBUG] train episode 5486: reward = 142.00, steps = 142\n",
      "00:06:19 [DEBUG] train episode 5487: reward = 159.00, steps = 159\n",
      "00:06:19 [DEBUG] train episode 5488: reward = 139.00, steps = 139\n",
      "00:06:19 [DEBUG] train episode 5489: reward = 155.00, steps = 155\n",
      "00:06:19 [DEBUG] train episode 5490: reward = 134.00, steps = 134\n",
      "00:06:19 [DEBUG] train episode 5491: reward = 129.00, steps = 129\n",
      "00:06:19 [DEBUG] train episode 5492: reward = 124.00, steps = 124\n",
      "00:06:19 [DEBUG] train episode 5493: reward = 200.00, steps = 200\n",
      "00:06:19 [DEBUG] train episode 5494: reward = 127.00, steps = 127\n",
      "00:06:19 [DEBUG] train episode 5495: reward = 186.00, steps = 186\n",
      "00:06:19 [DEBUG] train episode 5496: reward = 200.00, steps = 200\n",
      "00:06:19 [DEBUG] train episode 5497: reward = 156.00, steps = 156\n",
      "00:06:19 [DEBUG] train episode 5498: reward = 181.00, steps = 181\n",
      "00:06:19 [DEBUG] train episode 5499: reward = 136.00, steps = 136\n",
      "00:06:19 [DEBUG] train episode 5500: reward = 126.00, steps = 126\n",
      "00:06:19 [DEBUG] train episode 5501: reward = 200.00, steps = 200\n",
      "00:06:19 [DEBUG] train episode 5502: reward = 145.00, steps = 145\n",
      "00:06:19 [DEBUG] train episode 5503: reward = 151.00, steps = 151\n",
      "00:06:19 [DEBUG] train episode 5504: reward = 126.00, steps = 126\n",
      "00:06:19 [DEBUG] train episode 5505: reward = 143.00, steps = 143\n",
      "00:06:19 [DEBUG] train episode 5506: reward = 132.00, steps = 132\n",
      "00:06:20 [DEBUG] train episode 5507: reward = 136.00, steps = 136\n",
      "00:06:20 [DEBUG] train episode 5508: reward = 145.00, steps = 145\n",
      "00:06:20 [DEBUG] train episode 5509: reward = 200.00, steps = 200\n",
      "00:06:20 [DEBUG] train episode 5510: reward = 151.00, steps = 151\n",
      "00:06:20 [DEBUG] train episode 5511: reward = 184.00, steps = 184\n",
      "00:06:20 [DEBUG] train episode 5512: reward = 161.00, steps = 161\n",
      "00:06:20 [DEBUG] train episode 5513: reward = 185.00, steps = 185\n",
      "00:06:20 [DEBUG] train episode 5514: reward = 149.00, steps = 149\n",
      "00:06:20 [DEBUG] train episode 5515: reward = 179.00, steps = 179\n",
      "00:06:20 [DEBUG] train episode 5516: reward = 143.00, steps = 143\n",
      "00:06:20 [DEBUG] train episode 5517: reward = 129.00, steps = 129\n",
      "00:06:20 [DEBUG] train episode 5518: reward = 161.00, steps = 161\n",
      "00:06:20 [DEBUG] train episode 5519: reward = 139.00, steps = 139\n",
      "00:06:20 [DEBUG] train episode 5520: reward = 152.00, steps = 152\n",
      "00:06:20 [DEBUG] train episode 5521: reward = 126.00, steps = 126\n",
      "00:06:20 [DEBUG] train episode 5522: reward = 177.00, steps = 177\n",
      "00:06:20 [DEBUG] train episode 5523: reward = 101.00, steps = 101\n",
      "00:06:20 [DEBUG] train episode 5524: reward = 162.00, steps = 162\n",
      "00:06:20 [DEBUG] train episode 5525: reward = 171.00, steps = 171\n",
      "00:06:20 [DEBUG] train episode 5526: reward = 170.00, steps = 170\n",
      "00:06:20 [DEBUG] train episode 5527: reward = 164.00, steps = 164\n",
      "00:06:21 [DEBUG] train episode 5528: reward = 187.00, steps = 187\n",
      "00:06:21 [DEBUG] train episode 5529: reward = 117.00, steps = 117\n",
      "00:06:21 [DEBUG] train episode 5530: reward = 115.00, steps = 115\n",
      "00:06:21 [DEBUG] train episode 5531: reward = 145.00, steps = 145\n",
      "00:06:21 [DEBUG] train episode 5532: reward = 130.00, steps = 130\n",
      "00:06:21 [DEBUG] train episode 5533: reward = 144.00, steps = 144\n",
      "00:06:21 [DEBUG] train episode 5534: reward = 172.00, steps = 172\n",
      "00:06:21 [DEBUG] train episode 5535: reward = 200.00, steps = 200\n",
      "00:06:21 [DEBUG] train episode 5536: reward = 174.00, steps = 174\n",
      "00:06:21 [DEBUG] train episode 5537: reward = 145.00, steps = 145\n",
      "00:06:21 [DEBUG] train episode 5538: reward = 110.00, steps = 110\n",
      "00:06:21 [DEBUG] train episode 5539: reward = 105.00, steps = 105\n",
      "00:06:21 [DEBUG] train episode 5540: reward = 145.00, steps = 145\n",
      "00:06:21 [DEBUG] train episode 5541: reward = 135.00, steps = 135\n",
      "00:06:21 [DEBUG] train episode 5542: reward = 200.00, steps = 200\n",
      "00:06:21 [DEBUG] train episode 5543: reward = 143.00, steps = 143\n",
      "00:06:21 [DEBUG] train episode 5544: reward = 96.00, steps = 96\n",
      "00:06:21 [DEBUG] train episode 5545: reward = 100.00, steps = 100\n",
      "00:06:21 [DEBUG] train episode 5546: reward = 175.00, steps = 175\n",
      "00:06:21 [DEBUG] train episode 5547: reward = 107.00, steps = 107\n",
      "00:06:21 [DEBUG] train episode 5548: reward = 168.00, steps = 168\n",
      "00:06:22 [DEBUG] train episode 5549: reward = 200.00, steps = 200\n",
      "00:06:22 [DEBUG] train episode 5550: reward = 109.00, steps = 109\n",
      "00:06:22 [DEBUG] train episode 5551: reward = 138.00, steps = 138\n",
      "00:06:22 [DEBUG] train episode 5552: reward = 119.00, steps = 119\n",
      "00:06:22 [DEBUG] train episode 5553: reward = 171.00, steps = 171\n",
      "00:06:22 [DEBUG] train episode 5554: reward = 106.00, steps = 106\n",
      "00:06:22 [DEBUG] train episode 5555: reward = 125.00, steps = 125\n",
      "00:06:22 [DEBUG] train episode 5556: reward = 196.00, steps = 196\n",
      "00:06:22 [DEBUG] train episode 5557: reward = 148.00, steps = 148\n",
      "00:06:22 [DEBUG] train episode 5558: reward = 95.00, steps = 95\n",
      "00:06:22 [DEBUG] train episode 5559: reward = 122.00, steps = 122\n",
      "00:06:22 [DEBUG] train episode 5560: reward = 147.00, steps = 147\n",
      "00:06:22 [DEBUG] train episode 5561: reward = 89.00, steps = 89\n",
      "00:06:22 [DEBUG] train episode 5562: reward = 143.00, steps = 143\n",
      "00:06:22 [DEBUG] train episode 5563: reward = 93.00, steps = 93\n",
      "00:06:22 [DEBUG] train episode 5564: reward = 172.00, steps = 172\n",
      "00:06:22 [DEBUG] train episode 5565: reward = 95.00, steps = 95\n",
      "00:06:22 [DEBUG] train episode 5566: reward = 115.00, steps = 115\n",
      "00:06:22 [DEBUG] train episode 5567: reward = 98.00, steps = 98\n",
      "00:06:22 [DEBUG] train episode 5568: reward = 110.00, steps = 110\n",
      "00:06:22 [DEBUG] train episode 5569: reward = 116.00, steps = 116\n",
      "00:06:22 [DEBUG] train episode 5570: reward = 125.00, steps = 125\n",
      "00:06:22 [DEBUG] train episode 5571: reward = 173.00, steps = 173\n",
      "00:06:22 [DEBUG] train episode 5572: reward = 121.00, steps = 121\n",
      "00:06:22 [DEBUG] train episode 5573: reward = 121.00, steps = 121\n",
      "00:06:23 [DEBUG] train episode 5574: reward = 200.00, steps = 200\n",
      "00:06:23 [DEBUG] train episode 5575: reward = 122.00, steps = 122\n",
      "00:06:23 [DEBUG] train episode 5576: reward = 109.00, steps = 109\n",
      "00:06:23 [DEBUG] train episode 5577: reward = 148.00, steps = 148\n",
      "00:06:23 [DEBUG] train episode 5578: reward = 137.00, steps = 137\n",
      "00:06:23 [DEBUG] train episode 5579: reward = 160.00, steps = 160\n",
      "00:06:23 [DEBUG] train episode 5580: reward = 101.00, steps = 101\n",
      "00:06:23 [DEBUG] train episode 5581: reward = 122.00, steps = 122\n",
      "00:06:23 [DEBUG] train episode 5582: reward = 90.00, steps = 90\n",
      "00:06:23 [DEBUG] train episode 5583: reward = 143.00, steps = 143\n",
      "00:06:23 [DEBUG] train episode 5584: reward = 104.00, steps = 104\n",
      "00:06:23 [DEBUG] train episode 5585: reward = 125.00, steps = 125\n",
      "00:06:23 [DEBUG] train episode 5586: reward = 141.00, steps = 141\n",
      "00:06:23 [DEBUG] train episode 5587: reward = 108.00, steps = 108\n",
      "00:06:23 [DEBUG] train episode 5588: reward = 126.00, steps = 126\n",
      "00:06:23 [DEBUG] train episode 5589: reward = 117.00, steps = 117\n",
      "00:06:23 [DEBUG] train episode 5590: reward = 172.00, steps = 172\n",
      "00:06:23 [DEBUG] train episode 5591: reward = 112.00, steps = 112\n",
      "00:06:23 [DEBUG] train episode 5592: reward = 89.00, steps = 89\n",
      "00:06:23 [DEBUG] train episode 5593: reward = 93.00, steps = 93\n",
      "00:06:23 [DEBUG] train episode 5594: reward = 99.00, steps = 99\n",
      "00:06:23 [DEBUG] train episode 5595: reward = 99.00, steps = 99\n",
      "00:06:23 [DEBUG] train episode 5596: reward = 82.00, steps = 82\n",
      "00:06:23 [DEBUG] train episode 5597: reward = 109.00, steps = 109\n",
      "00:06:23 [DEBUG] train episode 5598: reward = 110.00, steps = 110\n",
      "00:06:24 [DEBUG] train episode 5599: reward = 150.00, steps = 150\n",
      "00:06:24 [DEBUG] train episode 5600: reward = 149.00, steps = 149\n",
      "00:06:24 [DEBUG] train episode 5601: reward = 200.00, steps = 200\n",
      "00:06:24 [DEBUG] train episode 5602: reward = 145.00, steps = 145\n",
      "00:06:24 [DEBUG] train episode 5603: reward = 110.00, steps = 110\n",
      "00:06:24 [DEBUG] train episode 5604: reward = 108.00, steps = 108\n",
      "00:06:24 [DEBUG] train episode 5605: reward = 94.00, steps = 94\n",
      "00:06:24 [DEBUG] train episode 5606: reward = 153.00, steps = 153\n",
      "00:06:24 [DEBUG] train episode 5607: reward = 89.00, steps = 89\n",
      "00:06:24 [DEBUG] train episode 5608: reward = 106.00, steps = 106\n",
      "00:06:24 [DEBUG] train episode 5609: reward = 179.00, steps = 179\n",
      "00:06:24 [DEBUG] train episode 5610: reward = 124.00, steps = 124\n",
      "00:06:24 [DEBUG] train episode 5611: reward = 92.00, steps = 92\n",
      "00:06:24 [DEBUG] train episode 5612: reward = 170.00, steps = 170\n",
      "00:06:24 [DEBUG] train episode 5613: reward = 119.00, steps = 119\n",
      "00:06:24 [DEBUG] train episode 5614: reward = 102.00, steps = 102\n",
      "00:06:24 [DEBUG] train episode 5615: reward = 163.00, steps = 163\n",
      "00:06:24 [DEBUG] train episode 5616: reward = 118.00, steps = 118\n",
      "00:06:24 [DEBUG] train episode 5617: reward = 200.00, steps = 200\n",
      "00:06:24 [DEBUG] train episode 5618: reward = 193.00, steps = 193\n",
      "00:06:24 [DEBUG] train episode 5619: reward = 144.00, steps = 144\n",
      "00:06:24 [DEBUG] train episode 5620: reward = 196.00, steps = 196\n",
      "00:06:24 [DEBUG] train episode 5621: reward = 200.00, steps = 200\n",
      "00:06:25 [DEBUG] train episode 5622: reward = 143.00, steps = 143\n",
      "00:06:25 [DEBUG] train episode 5623: reward = 99.00, steps = 99\n",
      "00:06:25 [DEBUG] train episode 5624: reward = 91.00, steps = 91\n",
      "00:06:25 [DEBUG] train episode 5625: reward = 106.00, steps = 106\n",
      "00:06:25 [DEBUG] train episode 5626: reward = 156.00, steps = 156\n",
      "00:06:25 [DEBUG] train episode 5627: reward = 127.00, steps = 127\n",
      "00:06:25 [DEBUG] train episode 5628: reward = 128.00, steps = 128\n",
      "00:06:25 [DEBUG] train episode 5629: reward = 163.00, steps = 163\n",
      "00:06:25 [DEBUG] train episode 5630: reward = 100.00, steps = 100\n",
      "00:06:25 [DEBUG] train episode 5631: reward = 172.00, steps = 172\n",
      "00:06:25 [DEBUG] train episode 5632: reward = 89.00, steps = 89\n",
      "00:06:25 [DEBUG] train episode 5633: reward = 100.00, steps = 100\n",
      "00:06:25 [DEBUG] train episode 5634: reward = 200.00, steps = 200\n",
      "00:06:25 [DEBUG] train episode 5635: reward = 96.00, steps = 96\n",
      "00:06:25 [DEBUG] train episode 5636: reward = 151.00, steps = 151\n",
      "00:06:25 [DEBUG] train episode 5637: reward = 119.00, steps = 119\n",
      "00:06:25 [DEBUG] train episode 5638: reward = 128.00, steps = 128\n",
      "00:06:25 [DEBUG] train episode 5639: reward = 142.00, steps = 142\n",
      "00:06:25 [DEBUG] train episode 5640: reward = 154.00, steps = 154\n",
      "00:06:25 [DEBUG] train episode 5641: reward = 116.00, steps = 116\n",
      "00:06:25 [DEBUG] train episode 5642: reward = 132.00, steps = 132\n",
      "00:06:25 [DEBUG] train episode 5643: reward = 157.00, steps = 157\n",
      "00:06:25 [DEBUG] train episode 5644: reward = 200.00, steps = 200\n",
      "00:06:25 [DEBUG] train episode 5645: reward = 114.00, steps = 114\n",
      "00:06:26 [DEBUG] train episode 5646: reward = 122.00, steps = 122\n",
      "00:06:26 [DEBUG] train episode 5647: reward = 160.00, steps = 160\n",
      "00:06:26 [DEBUG] train episode 5648: reward = 183.00, steps = 183\n",
      "00:06:26 [DEBUG] train episode 5649: reward = 137.00, steps = 137\n",
      "00:06:26 [DEBUG] train episode 5650: reward = 154.00, steps = 154\n",
      "00:06:26 [DEBUG] train episode 5651: reward = 188.00, steps = 188\n",
      "00:06:26 [DEBUG] train episode 5652: reward = 133.00, steps = 133\n",
      "00:06:26 [DEBUG] train episode 5653: reward = 200.00, steps = 200\n",
      "00:06:26 [DEBUG] train episode 5654: reward = 180.00, steps = 180\n",
      "00:06:26 [DEBUG] train episode 5655: reward = 101.00, steps = 101\n",
      "00:06:26 [DEBUG] train episode 5656: reward = 97.00, steps = 97\n",
      "00:06:26 [DEBUG] train episode 5657: reward = 159.00, steps = 159\n",
      "00:06:26 [DEBUG] train episode 5658: reward = 100.00, steps = 100\n",
      "00:06:26 [DEBUG] train episode 5659: reward = 121.00, steps = 121\n",
      "00:06:26 [DEBUG] train episode 5660: reward = 176.00, steps = 176\n",
      "00:06:26 [DEBUG] train episode 5661: reward = 113.00, steps = 113\n",
      "00:06:26 [DEBUG] train episode 5662: reward = 117.00, steps = 117\n",
      "00:06:26 [DEBUG] train episode 5663: reward = 146.00, steps = 146\n",
      "00:06:26 [DEBUG] train episode 5664: reward = 181.00, steps = 181\n",
      "00:06:26 [DEBUG] train episode 5665: reward = 118.00, steps = 118\n",
      "00:06:26 [DEBUG] train episode 5666: reward = 133.00, steps = 133\n",
      "00:06:26 [DEBUG] train episode 5667: reward = 134.00, steps = 134\n",
      "00:06:27 [DEBUG] train episode 5668: reward = 123.00, steps = 123\n",
      "00:06:27 [DEBUG] train episode 5669: reward = 97.00, steps = 97\n",
      "00:06:27 [DEBUG] train episode 5670: reward = 126.00, steps = 126\n",
      "00:06:27 [DEBUG] train episode 5671: reward = 200.00, steps = 200\n",
      "00:06:27 [DEBUG] train episode 5672: reward = 122.00, steps = 122\n",
      "00:06:27 [DEBUG] train episode 5673: reward = 127.00, steps = 127\n",
      "00:06:27 [DEBUG] train episode 5674: reward = 131.00, steps = 131\n",
      "00:06:27 [DEBUG] train episode 5675: reward = 134.00, steps = 134\n",
      "00:06:27 [DEBUG] train episode 5676: reward = 168.00, steps = 168\n",
      "00:06:27 [DEBUG] train episode 5677: reward = 157.00, steps = 157\n",
      "00:06:27 [DEBUG] train episode 5678: reward = 129.00, steps = 129\n",
      "00:06:27 [DEBUG] train episode 5679: reward = 200.00, steps = 200\n",
      "00:06:27 [DEBUG] train episode 5680: reward = 186.00, steps = 186\n",
      "00:06:27 [DEBUG] train episode 5681: reward = 98.00, steps = 98\n",
      "00:06:27 [DEBUG] train episode 5682: reward = 114.00, steps = 114\n",
      "00:06:27 [DEBUG] train episode 5683: reward = 119.00, steps = 119\n",
      "00:06:27 [DEBUG] train episode 5684: reward = 190.00, steps = 190\n",
      "00:06:27 [DEBUG] train episode 5685: reward = 162.00, steps = 162\n",
      "00:06:27 [DEBUG] train episode 5686: reward = 109.00, steps = 109\n",
      "00:06:27 [DEBUG] train episode 5687: reward = 127.00, steps = 127\n",
      "00:06:27 [DEBUG] train episode 5688: reward = 132.00, steps = 132\n",
      "00:06:27 [DEBUG] train episode 5689: reward = 149.00, steps = 149\n",
      "00:06:28 [DEBUG] train episode 5690: reward = 140.00, steps = 140\n",
      "00:06:28 [DEBUG] train episode 5691: reward = 124.00, steps = 124\n",
      "00:06:28 [DEBUG] train episode 5692: reward = 155.00, steps = 155\n",
      "00:06:28 [DEBUG] train episode 5693: reward = 143.00, steps = 143\n",
      "00:06:28 [DEBUG] train episode 5694: reward = 150.00, steps = 150\n",
      "00:06:28 [DEBUG] train episode 5695: reward = 200.00, steps = 200\n",
      "00:06:28 [DEBUG] train episode 5696: reward = 105.00, steps = 105\n",
      "00:06:28 [DEBUG] train episode 5697: reward = 124.00, steps = 124\n",
      "00:06:28 [DEBUG] train episode 5698: reward = 200.00, steps = 200\n",
      "00:06:28 [DEBUG] train episode 5699: reward = 119.00, steps = 119\n",
      "00:06:28 [DEBUG] train episode 5700: reward = 110.00, steps = 110\n",
      "00:06:28 [DEBUG] train episode 5701: reward = 156.00, steps = 156\n",
      "00:06:28 [DEBUG] train episode 5702: reward = 177.00, steps = 177\n",
      "00:06:28 [DEBUG] train episode 5703: reward = 159.00, steps = 159\n",
      "00:06:28 [DEBUG] train episode 5704: reward = 200.00, steps = 200\n",
      "00:06:28 [DEBUG] train episode 5705: reward = 115.00, steps = 115\n",
      "00:06:28 [DEBUG] train episode 5706: reward = 131.00, steps = 131\n",
      "00:06:28 [DEBUG] train episode 5707: reward = 133.00, steps = 133\n",
      "00:06:28 [DEBUG] train episode 5708: reward = 128.00, steps = 128\n",
      "00:06:28 [DEBUG] train episode 5709: reward = 200.00, steps = 200\n",
      "00:06:28 [DEBUG] train episode 5710: reward = 155.00, steps = 155\n",
      "00:06:29 [DEBUG] train episode 5711: reward = 151.00, steps = 151\n",
      "00:06:29 [DEBUG] train episode 5712: reward = 101.00, steps = 101\n",
      "00:06:29 [DEBUG] train episode 5713: reward = 125.00, steps = 125\n",
      "00:06:29 [DEBUG] train episode 5714: reward = 152.00, steps = 152\n",
      "00:06:29 [DEBUG] train episode 5715: reward = 128.00, steps = 128\n",
      "00:06:29 [DEBUG] train episode 5716: reward = 117.00, steps = 117\n",
      "00:06:29 [DEBUG] train episode 5717: reward = 112.00, steps = 112\n",
      "00:06:29 [DEBUG] train episode 5718: reward = 150.00, steps = 150\n",
      "00:06:29 [DEBUG] train episode 5719: reward = 200.00, steps = 200\n",
      "00:06:29 [DEBUG] train episode 5720: reward = 104.00, steps = 104\n",
      "00:06:29 [DEBUG] train episode 5721: reward = 150.00, steps = 150\n",
      "00:06:29 [DEBUG] train episode 5722: reward = 137.00, steps = 137\n",
      "00:06:29 [DEBUG] train episode 5723: reward = 185.00, steps = 185\n",
      "00:06:29 [DEBUG] train episode 5724: reward = 194.00, steps = 194\n",
      "00:06:29 [DEBUG] train episode 5725: reward = 107.00, steps = 107\n",
      "00:06:29 [DEBUG] train episode 5726: reward = 173.00, steps = 173\n",
      "00:06:29 [DEBUG] train episode 5727: reward = 178.00, steps = 178\n",
      "00:06:29 [DEBUG] train episode 5728: reward = 192.00, steps = 192\n",
      "00:06:29 [DEBUG] train episode 5729: reward = 163.00, steps = 163\n",
      "00:06:29 [DEBUG] train episode 5730: reward = 200.00, steps = 200\n",
      "00:06:29 [DEBUG] train episode 5731: reward = 113.00, steps = 113\n",
      "00:06:29 [DEBUG] train episode 5732: reward = 138.00, steps = 138\n",
      "00:06:30 [DEBUG] train episode 5733: reward = 164.00, steps = 164\n",
      "00:06:30 [DEBUG] train episode 5734: reward = 151.00, steps = 151\n",
      "00:06:30 [DEBUG] train episode 5735: reward = 150.00, steps = 150\n",
      "00:06:30 [DEBUG] train episode 5736: reward = 132.00, steps = 132\n",
      "00:06:30 [DEBUG] train episode 5737: reward = 146.00, steps = 146\n",
      "00:06:30 [DEBUG] train episode 5738: reward = 180.00, steps = 180\n",
      "00:06:30 [DEBUG] train episode 5739: reward = 151.00, steps = 151\n",
      "00:06:30 [DEBUG] train episode 5740: reward = 200.00, steps = 200\n",
      "00:06:30 [DEBUG] train episode 5741: reward = 196.00, steps = 196\n",
      "00:06:30 [DEBUG] train episode 5742: reward = 111.00, steps = 111\n",
      "00:06:30 [DEBUG] train episode 5743: reward = 172.00, steps = 172\n",
      "00:06:30 [DEBUG] train episode 5744: reward = 178.00, steps = 178\n",
      "00:06:30 [DEBUG] train episode 5745: reward = 158.00, steps = 158\n",
      "00:06:30 [DEBUG] train episode 5746: reward = 115.00, steps = 115\n",
      "00:06:30 [DEBUG] train episode 5747: reward = 138.00, steps = 138\n",
      "00:06:30 [DEBUG] train episode 5748: reward = 156.00, steps = 156\n",
      "00:06:30 [DEBUG] train episode 5749: reward = 200.00, steps = 200\n",
      "00:06:30 [DEBUG] train episode 5750: reward = 155.00, steps = 155\n",
      "00:06:30 [DEBUG] train episode 5751: reward = 135.00, steps = 135\n",
      "00:06:30 [DEBUG] train episode 5752: reward = 130.00, steps = 130\n",
      "00:06:31 [DEBUG] train episode 5753: reward = 160.00, steps = 160\n",
      "00:06:31 [DEBUG] train episode 5754: reward = 112.00, steps = 112\n",
      "00:06:31 [DEBUG] train episode 5755: reward = 158.00, steps = 158\n",
      "00:06:31 [DEBUG] train episode 5756: reward = 158.00, steps = 158\n",
      "00:06:31 [DEBUG] train episode 5757: reward = 180.00, steps = 180\n",
      "00:06:31 [DEBUG] train episode 5758: reward = 186.00, steps = 186\n",
      "00:06:31 [DEBUG] train episode 5759: reward = 200.00, steps = 200\n",
      "00:06:31 [DEBUG] train episode 5760: reward = 128.00, steps = 128\n",
      "00:06:31 [DEBUG] train episode 5761: reward = 200.00, steps = 200\n",
      "00:06:31 [DEBUG] train episode 5762: reward = 159.00, steps = 159\n",
      "00:06:31 [DEBUG] train episode 5763: reward = 135.00, steps = 135\n",
      "00:06:31 [DEBUG] train episode 5764: reward = 151.00, steps = 151\n",
      "00:06:31 [DEBUG] train episode 5765: reward = 151.00, steps = 151\n",
      "00:06:31 [DEBUG] train episode 5766: reward = 163.00, steps = 163\n",
      "00:06:31 [DEBUG] train episode 5767: reward = 142.00, steps = 142\n",
      "00:06:31 [DEBUG] train episode 5768: reward = 124.00, steps = 124\n",
      "00:06:31 [DEBUG] train episode 5769: reward = 188.00, steps = 188\n",
      "00:06:31 [DEBUG] train episode 5770: reward = 200.00, steps = 200\n",
      "00:06:31 [DEBUG] train episode 5771: reward = 132.00, steps = 132\n",
      "00:06:31 [DEBUG] train episode 5772: reward = 165.00, steps = 165\n",
      "00:06:31 [DEBUG] train episode 5773: reward = 139.00, steps = 139\n",
      "00:06:32 [DEBUG] train episode 5774: reward = 161.00, steps = 161\n",
      "00:06:32 [DEBUG] train episode 5775: reward = 200.00, steps = 200\n",
      "00:06:32 [DEBUG] train episode 5776: reward = 139.00, steps = 139\n",
      "00:06:32 [DEBUG] train episode 5777: reward = 200.00, steps = 200\n",
      "00:06:32 [DEBUG] train episode 5778: reward = 129.00, steps = 129\n",
      "00:06:32 [DEBUG] train episode 5779: reward = 164.00, steps = 164\n",
      "00:06:32 [DEBUG] train episode 5780: reward = 163.00, steps = 163\n",
      "00:06:32 [DEBUG] train episode 5781: reward = 135.00, steps = 135\n",
      "00:06:32 [DEBUG] train episode 5782: reward = 172.00, steps = 172\n",
      "00:06:32 [DEBUG] train episode 5783: reward = 113.00, steps = 113\n",
      "00:06:32 [DEBUG] train episode 5784: reward = 181.00, steps = 181\n",
      "00:06:32 [DEBUG] train episode 5785: reward = 176.00, steps = 176\n",
      "00:06:32 [DEBUG] train episode 5786: reward = 127.00, steps = 127\n",
      "00:06:32 [DEBUG] train episode 5787: reward = 164.00, steps = 164\n",
      "00:06:32 [DEBUG] train episode 5788: reward = 175.00, steps = 175\n",
      "00:06:32 [DEBUG] train episode 5789: reward = 200.00, steps = 200\n",
      "00:06:32 [DEBUG] train episode 5790: reward = 178.00, steps = 178\n",
      "00:06:32 [DEBUG] train episode 5791: reward = 112.00, steps = 112\n",
      "00:06:32 [DEBUG] train episode 5792: reward = 199.00, steps = 199\n",
      "00:06:32 [DEBUG] train episode 5793: reward = 137.00, steps = 137\n",
      "00:06:33 [DEBUG] train episode 5794: reward = 170.00, steps = 170\n",
      "00:06:33 [DEBUG] train episode 5795: reward = 141.00, steps = 141\n",
      "00:06:33 [DEBUG] train episode 5796: reward = 200.00, steps = 200\n",
      "00:06:33 [DEBUG] train episode 5797: reward = 144.00, steps = 144\n",
      "00:06:33 [DEBUG] train episode 5798: reward = 121.00, steps = 121\n",
      "00:06:33 [DEBUG] train episode 5799: reward = 185.00, steps = 185\n",
      "00:06:33 [DEBUG] train episode 5800: reward = 146.00, steps = 146\n",
      "00:06:33 [DEBUG] train episode 5801: reward = 200.00, steps = 200\n",
      "00:06:33 [DEBUG] train episode 5802: reward = 136.00, steps = 136\n",
      "00:06:33 [DEBUG] train episode 5803: reward = 149.00, steps = 149\n",
      "00:06:33 [DEBUG] train episode 5804: reward = 139.00, steps = 139\n",
      "00:06:33 [DEBUG] train episode 5805: reward = 200.00, steps = 200\n",
      "00:06:33 [DEBUG] train episode 5806: reward = 127.00, steps = 127\n",
      "00:06:33 [DEBUG] train episode 5807: reward = 177.00, steps = 177\n",
      "00:06:33 [DEBUG] train episode 5808: reward = 154.00, steps = 154\n",
      "00:06:33 [DEBUG] train episode 5809: reward = 112.00, steps = 112\n",
      "00:06:33 [DEBUG] train episode 5810: reward = 158.00, steps = 158\n",
      "00:06:33 [DEBUG] train episode 5811: reward = 145.00, steps = 145\n",
      "00:06:33 [DEBUG] train episode 5812: reward = 146.00, steps = 146\n",
      "00:06:33 [DEBUG] train episode 5813: reward = 152.00, steps = 152\n",
      "00:06:33 [DEBUG] train episode 5814: reward = 140.00, steps = 140\n",
      "00:06:34 [DEBUG] train episode 5815: reward = 181.00, steps = 181\n",
      "00:06:34 [DEBUG] train episode 5816: reward = 114.00, steps = 114\n",
      "00:06:34 [DEBUG] train episode 5817: reward = 152.00, steps = 152\n",
      "00:06:34 [DEBUG] train episode 5818: reward = 200.00, steps = 200\n",
      "00:06:34 [DEBUG] train episode 5819: reward = 200.00, steps = 200\n",
      "00:06:34 [DEBUG] train episode 5820: reward = 200.00, steps = 200\n",
      "00:06:34 [DEBUG] train episode 5821: reward = 155.00, steps = 155\n",
      "00:06:34 [DEBUG] train episode 5822: reward = 200.00, steps = 200\n",
      "00:06:34 [DEBUG] train episode 5823: reward = 200.00, steps = 200\n",
      "00:06:34 [DEBUG] train episode 5824: reward = 111.00, steps = 111\n",
      "00:06:34 [DEBUG] train episode 5825: reward = 200.00, steps = 200\n",
      "00:06:34 [DEBUG] train episode 5826: reward = 182.00, steps = 182\n",
      "00:06:34 [DEBUG] train episode 5827: reward = 169.00, steps = 169\n",
      "00:06:34 [DEBUG] train episode 5828: reward = 138.00, steps = 138\n",
      "00:06:34 [DEBUG] train episode 5829: reward = 200.00, steps = 200\n",
      "00:06:34 [DEBUG] train episode 5830: reward = 200.00, steps = 200\n",
      "00:06:34 [DEBUG] train episode 5831: reward = 138.00, steps = 138\n",
      "00:06:34 [DEBUG] train episode 5832: reward = 129.00, steps = 129\n",
      "00:06:34 [DEBUG] train episode 5833: reward = 127.00, steps = 127\n",
      "00:06:35 [DEBUG] train episode 5834: reward = 120.00, steps = 120\n",
      "00:06:35 [DEBUG] train episode 5835: reward = 131.00, steps = 131\n",
      "00:06:35 [DEBUG] train episode 5836: reward = 113.00, steps = 113\n",
      "00:06:35 [DEBUG] train episode 5837: reward = 128.00, steps = 128\n",
      "00:06:35 [DEBUG] train episode 5838: reward = 132.00, steps = 132\n",
      "00:06:35 [DEBUG] train episode 5839: reward = 129.00, steps = 129\n",
      "00:06:35 [DEBUG] train episode 5840: reward = 100.00, steps = 100\n",
      "00:06:35 [DEBUG] train episode 5841: reward = 148.00, steps = 148\n",
      "00:06:35 [DEBUG] train episode 5842: reward = 200.00, steps = 200\n",
      "00:06:35 [DEBUG] train episode 5843: reward = 122.00, steps = 122\n",
      "00:06:35 [DEBUG] train episode 5844: reward = 136.00, steps = 136\n",
      "00:06:35 [DEBUG] train episode 5845: reward = 149.00, steps = 149\n",
      "00:06:35 [DEBUG] train episode 5846: reward = 184.00, steps = 184\n",
      "00:06:35 [DEBUG] train episode 5847: reward = 120.00, steps = 120\n",
      "00:06:35 [DEBUG] train episode 5848: reward = 136.00, steps = 136\n",
      "00:06:35 [DEBUG] train episode 5849: reward = 144.00, steps = 144\n",
      "00:06:35 [DEBUG] train episode 5850: reward = 149.00, steps = 149\n",
      "00:06:35 [DEBUG] train episode 5851: reward = 200.00, steps = 200\n",
      "00:06:35 [DEBUG] train episode 5852: reward = 136.00, steps = 136\n",
      "00:06:35 [DEBUG] train episode 5853: reward = 200.00, steps = 200\n",
      "00:06:35 [DEBUG] train episode 5854: reward = 133.00, steps = 133\n",
      "00:06:35 [DEBUG] train episode 5855: reward = 200.00, steps = 200\n",
      "00:06:36 [DEBUG] train episode 5856: reward = 179.00, steps = 179\n",
      "00:06:36 [DEBUG] train episode 5857: reward = 148.00, steps = 148\n",
      "00:06:36 [DEBUG] train episode 5858: reward = 186.00, steps = 186\n",
      "00:06:36 [DEBUG] train episode 5859: reward = 125.00, steps = 125\n",
      "00:06:36 [DEBUG] train episode 5860: reward = 134.00, steps = 134\n",
      "00:06:36 [DEBUG] train episode 5861: reward = 131.00, steps = 131\n",
      "00:06:36 [DEBUG] train episode 5862: reward = 190.00, steps = 190\n",
      "00:06:36 [DEBUG] train episode 5863: reward = 130.00, steps = 130\n",
      "00:06:36 [DEBUG] train episode 5864: reward = 127.00, steps = 127\n",
      "00:06:36 [DEBUG] train episode 5865: reward = 200.00, steps = 200\n",
      "00:06:36 [DEBUG] train episode 5866: reward = 167.00, steps = 167\n",
      "00:06:36 [DEBUG] train episode 5867: reward = 163.00, steps = 163\n",
      "00:06:36 [DEBUG] train episode 5868: reward = 175.00, steps = 175\n",
      "00:06:36 [DEBUG] train episode 5869: reward = 200.00, steps = 200\n",
      "00:06:36 [DEBUG] train episode 5870: reward = 157.00, steps = 157\n",
      "00:06:36 [DEBUG] train episode 5871: reward = 168.00, steps = 168\n",
      "00:06:36 [DEBUG] train episode 5872: reward = 142.00, steps = 142\n",
      "00:06:36 [DEBUG] train episode 5873: reward = 193.00, steps = 193\n",
      "00:06:36 [DEBUG] train episode 5874: reward = 193.00, steps = 193\n",
      "00:06:36 [DEBUG] train episode 5875: reward = 200.00, steps = 200\n",
      "00:06:37 [DEBUG] train episode 5876: reward = 200.00, steps = 200\n",
      "00:06:37 [DEBUG] train episode 5877: reward = 166.00, steps = 166\n",
      "00:06:37 [DEBUG] train episode 5878: reward = 166.00, steps = 166\n",
      "00:06:37 [DEBUG] train episode 5879: reward = 129.00, steps = 129\n",
      "00:06:37 [DEBUG] train episode 5880: reward = 200.00, steps = 200\n",
      "00:06:37 [DEBUG] train episode 5881: reward = 173.00, steps = 173\n",
      "00:06:37 [DEBUG] train episode 5882: reward = 163.00, steps = 163\n",
      "00:06:37 [DEBUG] train episode 5883: reward = 132.00, steps = 132\n",
      "00:06:37 [DEBUG] train episode 5884: reward = 200.00, steps = 200\n",
      "00:06:37 [DEBUG] train episode 5885: reward = 141.00, steps = 141\n",
      "00:06:37 [DEBUG] train episode 5886: reward = 144.00, steps = 144\n",
      "00:06:37 [DEBUG] train episode 5887: reward = 200.00, steps = 200\n",
      "00:06:37 [DEBUG] train episode 5888: reward = 151.00, steps = 151\n",
      "00:06:37 [DEBUG] train episode 5889: reward = 123.00, steps = 123\n",
      "00:06:37 [DEBUG] train episode 5890: reward = 182.00, steps = 182\n",
      "00:06:37 [DEBUG] train episode 5891: reward = 147.00, steps = 147\n",
      "00:06:37 [DEBUG] train episode 5892: reward = 167.00, steps = 167\n",
      "00:06:37 [DEBUG] train episode 5893: reward = 156.00, steps = 156\n",
      "00:06:37 [DEBUG] train episode 5894: reward = 135.00, steps = 135\n",
      "00:06:37 [DEBUG] train episode 5895: reward = 171.00, steps = 171\n",
      "00:06:38 [DEBUG] train episode 5896: reward = 141.00, steps = 141\n",
      "00:06:38 [DEBUG] train episode 5897: reward = 200.00, steps = 200\n",
      "00:06:38 [DEBUG] train episode 5898: reward = 135.00, steps = 135\n",
      "00:06:38 [DEBUG] train episode 5899: reward = 169.00, steps = 169\n",
      "00:06:38 [DEBUG] train episode 5900: reward = 170.00, steps = 170\n",
      "00:06:38 [DEBUG] train episode 5901: reward = 200.00, steps = 200\n",
      "00:06:38 [DEBUG] train episode 5902: reward = 200.00, steps = 200\n",
      "00:06:38 [DEBUG] train episode 5903: reward = 137.00, steps = 137\n",
      "00:06:38 [DEBUG] train episode 5904: reward = 133.00, steps = 133\n",
      "00:06:38 [DEBUG] train episode 5905: reward = 200.00, steps = 200\n",
      "00:06:38 [DEBUG] train episode 5906: reward = 179.00, steps = 179\n",
      "00:06:38 [DEBUG] train episode 5907: reward = 177.00, steps = 177\n",
      "00:06:38 [DEBUG] train episode 5908: reward = 148.00, steps = 148\n",
      "00:06:38 [DEBUG] train episode 5909: reward = 200.00, steps = 200\n",
      "00:06:38 [DEBUG] train episode 5910: reward = 174.00, steps = 174\n",
      "00:06:38 [DEBUG] train episode 5911: reward = 144.00, steps = 144\n",
      "00:06:38 [DEBUG] train episode 5912: reward = 173.00, steps = 173\n",
      "00:06:38 [DEBUG] train episode 5913: reward = 181.00, steps = 181\n",
      "00:06:38 [DEBUG] train episode 5914: reward = 147.00, steps = 147\n",
      "00:06:39 [DEBUG] train episode 5915: reward = 155.00, steps = 155\n",
      "00:06:39 [DEBUG] train episode 5916: reward = 179.00, steps = 179\n",
      "00:06:39 [DEBUG] train episode 5917: reward = 200.00, steps = 200\n",
      "00:06:39 [DEBUG] train episode 5918: reward = 166.00, steps = 166\n",
      "00:06:39 [DEBUG] train episode 5919: reward = 200.00, steps = 200\n",
      "00:06:39 [DEBUG] train episode 5920: reward = 197.00, steps = 197\n",
      "00:06:39 [DEBUG] train episode 5921: reward = 159.00, steps = 159\n",
      "00:06:39 [DEBUG] train episode 5922: reward = 164.00, steps = 164\n",
      "00:06:39 [DEBUG] train episode 5923: reward = 148.00, steps = 148\n",
      "00:06:39 [DEBUG] train episode 5924: reward = 192.00, steps = 192\n",
      "00:06:39 [DEBUG] train episode 5925: reward = 154.00, steps = 154\n",
      "00:06:39 [DEBUG] train episode 5926: reward = 181.00, steps = 181\n",
      "00:06:39 [DEBUG] train episode 5927: reward = 165.00, steps = 165\n",
      "00:06:39 [DEBUG] train episode 5928: reward = 197.00, steps = 197\n",
      "00:06:39 [DEBUG] train episode 5929: reward = 150.00, steps = 150\n",
      "00:06:39 [DEBUG] train episode 5930: reward = 194.00, steps = 194\n",
      "00:06:39 [DEBUG] train episode 5931: reward = 138.00, steps = 138\n",
      "00:06:39 [DEBUG] train episode 5932: reward = 200.00, steps = 200\n",
      "00:06:39 [DEBUG] train episode 5933: reward = 158.00, steps = 158\n",
      "00:06:40 [DEBUG] train episode 5934: reward = 193.00, steps = 193\n",
      "00:06:40 [DEBUG] train episode 5935: reward = 139.00, steps = 139\n",
      "00:06:40 [DEBUG] train episode 5936: reward = 157.00, steps = 157\n",
      "00:06:40 [DEBUG] train episode 5937: reward = 156.00, steps = 156\n",
      "00:06:40 [DEBUG] train episode 5938: reward = 148.00, steps = 148\n",
      "00:06:40 [DEBUG] train episode 5939: reward = 172.00, steps = 172\n",
      "00:06:40 [DEBUG] train episode 5940: reward = 200.00, steps = 200\n",
      "00:06:40 [DEBUG] train episode 5941: reward = 200.00, steps = 200\n",
      "00:06:40 [DEBUG] train episode 5942: reward = 163.00, steps = 163\n",
      "00:06:40 [DEBUG] train episode 5943: reward = 146.00, steps = 146\n",
      "00:06:40 [DEBUG] train episode 5944: reward = 200.00, steps = 200\n",
      "00:06:40 [DEBUG] train episode 5945: reward = 200.00, steps = 200\n",
      "00:06:40 [DEBUG] train episode 5946: reward = 166.00, steps = 166\n",
      "00:06:40 [DEBUG] train episode 5947: reward = 167.00, steps = 167\n",
      "00:06:40 [DEBUG] train episode 5948: reward = 200.00, steps = 200\n",
      "00:06:40 [DEBUG] train episode 5949: reward = 200.00, steps = 200\n",
      "00:06:40 [DEBUG] train episode 5950: reward = 187.00, steps = 187\n",
      "00:06:40 [DEBUG] train episode 5951: reward = 200.00, steps = 200\n",
      "00:06:40 [DEBUG] train episode 5952: reward = 194.00, steps = 194\n",
      "00:06:41 [DEBUG] train episode 5953: reward = 165.00, steps = 165\n",
      "00:06:41 [DEBUG] train episode 5954: reward = 200.00, steps = 200\n",
      "00:06:41 [DEBUG] train episode 5955: reward = 200.00, steps = 200\n",
      "00:06:41 [DEBUG] train episode 5956: reward = 174.00, steps = 174\n",
      "00:06:41 [DEBUG] train episode 5957: reward = 161.00, steps = 161\n",
      "00:06:41 [DEBUG] train episode 5958: reward = 195.00, steps = 195\n",
      "00:06:41 [DEBUG] train episode 5959: reward = 180.00, steps = 180\n",
      "00:06:41 [DEBUG] train episode 5960: reward = 164.00, steps = 164\n",
      "00:06:41 [DEBUG] train episode 5961: reward = 190.00, steps = 190\n",
      "00:06:41 [DEBUG] train episode 5962: reward = 179.00, steps = 179\n",
      "00:06:41 [DEBUG] train episode 5963: reward = 146.00, steps = 146\n",
      "00:06:41 [DEBUG] train episode 5964: reward = 148.00, steps = 148\n",
      "00:06:41 [DEBUG] train episode 5965: reward = 200.00, steps = 200\n",
      "00:06:41 [DEBUG] train episode 5966: reward = 164.00, steps = 164\n",
      "00:06:41 [DEBUG] train episode 5967: reward = 200.00, steps = 200\n",
      "00:06:41 [DEBUG] train episode 5968: reward = 162.00, steps = 162\n",
      "00:06:41 [DEBUG] train episode 5969: reward = 200.00, steps = 200\n",
      "00:06:41 [DEBUG] train episode 5970: reward = 179.00, steps = 179\n",
      "00:06:42 [DEBUG] train episode 5971: reward = 200.00, steps = 200\n",
      "00:06:42 [DEBUG] train episode 5972: reward = 142.00, steps = 142\n",
      "00:06:42 [DEBUG] train episode 5973: reward = 200.00, steps = 200\n",
      "00:06:42 [DEBUG] train episode 5974: reward = 154.00, steps = 154\n",
      "00:06:42 [DEBUG] train episode 5975: reward = 140.00, steps = 140\n",
      "00:06:42 [DEBUG] train episode 5976: reward = 161.00, steps = 161\n",
      "00:06:42 [DEBUG] train episode 5977: reward = 171.00, steps = 171\n",
      "00:06:42 [DEBUG] train episode 5978: reward = 200.00, steps = 200\n",
      "00:06:42 [DEBUG] train episode 5979: reward = 200.00, steps = 200\n",
      "00:06:42 [DEBUG] train episode 5980: reward = 196.00, steps = 196\n",
      "00:06:42 [DEBUG] train episode 5981: reward = 189.00, steps = 189\n",
      "00:06:42 [DEBUG] train episode 5982: reward = 200.00, steps = 200\n",
      "00:06:42 [DEBUG] train episode 5983: reward = 151.00, steps = 151\n",
      "00:06:42 [DEBUG] train episode 5984: reward = 172.00, steps = 172\n",
      "00:06:42 [DEBUG] train episode 5985: reward = 190.00, steps = 190\n",
      "00:06:42 [DEBUG] train episode 5986: reward = 164.00, steps = 164\n",
      "00:06:42 [DEBUG] train episode 5987: reward = 181.00, steps = 181\n",
      "00:06:42 [DEBUG] train episode 5988: reward = 200.00, steps = 200\n",
      "00:06:43 [DEBUG] train episode 5989: reward = 200.00, steps = 200\n",
      "00:06:43 [DEBUG] train episode 5990: reward = 151.00, steps = 151\n",
      "00:06:43 [DEBUG] train episode 5991: reward = 174.00, steps = 174\n",
      "00:06:43 [DEBUG] train episode 5992: reward = 200.00, steps = 200\n",
      "00:06:43 [DEBUG] train episode 5993: reward = 200.00, steps = 200\n",
      "00:06:43 [DEBUG] train episode 5994: reward = 200.00, steps = 200\n",
      "00:06:43 [DEBUG] train episode 5995: reward = 169.00, steps = 169\n",
      "00:06:43 [DEBUG] train episode 5996: reward = 190.00, steps = 190\n",
      "00:06:43 [DEBUG] train episode 5997: reward = 194.00, steps = 194\n",
      "00:06:43 [DEBUG] train episode 5998: reward = 174.00, steps = 174\n",
      "00:06:43 [DEBUG] train episode 5999: reward = 153.00, steps = 153\n",
      "00:06:43 [DEBUG] train episode 6000: reward = 200.00, steps = 200\n",
      "00:06:43 [DEBUG] train episode 6001: reward = 143.00, steps = 143\n",
      "00:06:43 [DEBUG] train episode 6002: reward = 192.00, steps = 192\n",
      "00:06:43 [DEBUG] train episode 6003: reward = 200.00, steps = 200\n",
      "00:06:43 [DEBUG] train episode 6004: reward = 200.00, steps = 200\n",
      "00:06:43 [DEBUG] train episode 6005: reward = 195.00, steps = 195\n",
      "00:06:43 [DEBUG] train episode 6006: reward = 200.00, steps = 200\n",
      "00:06:43 [DEBUG] train episode 6007: reward = 200.00, steps = 200\n",
      "00:06:44 [DEBUG] train episode 6008: reward = 163.00, steps = 163\n",
      "00:06:44 [DEBUG] train episode 6009: reward = 200.00, steps = 200\n",
      "00:06:44 [DEBUG] train episode 6010: reward = 190.00, steps = 190\n",
      "00:06:44 [DEBUG] train episode 6011: reward = 156.00, steps = 156\n",
      "00:06:44 [DEBUG] train episode 6012: reward = 161.00, steps = 161\n",
      "00:06:44 [DEBUG] train episode 6013: reward = 200.00, steps = 200\n",
      "00:06:44 [DEBUG] train episode 6014: reward = 200.00, steps = 200\n",
      "00:06:44 [DEBUG] train episode 6015: reward = 200.00, steps = 200\n",
      "00:06:44 [DEBUG] train episode 6016: reward = 200.00, steps = 200\n",
      "00:06:44 [DEBUG] train episode 6017: reward = 181.00, steps = 181\n",
      "00:06:44 [DEBUG] train episode 6018: reward = 173.00, steps = 173\n",
      "00:06:44 [DEBUG] train episode 6019: reward = 152.00, steps = 152\n",
      "00:06:44 [DEBUG] train episode 6020: reward = 151.00, steps = 151\n",
      "00:06:44 [DEBUG] train episode 6021: reward = 200.00, steps = 200\n",
      "00:06:44 [DEBUG] train episode 6022: reward = 147.00, steps = 147\n",
      "00:06:44 [DEBUG] train episode 6023: reward = 128.00, steps = 128\n",
      "00:06:44 [DEBUG] train episode 6024: reward = 170.00, steps = 170\n",
      "00:06:44 [DEBUG] train episode 6025: reward = 200.00, steps = 200\n",
      "00:06:45 [DEBUG] train episode 6026: reward = 200.00, steps = 200\n",
      "00:06:45 [DEBUG] train episode 6027: reward = 144.00, steps = 144\n",
      "00:06:45 [DEBUG] train episode 6028: reward = 142.00, steps = 142\n",
      "00:06:45 [DEBUG] train episode 6029: reward = 200.00, steps = 200\n",
      "00:06:45 [DEBUG] train episode 6030: reward = 200.00, steps = 200\n",
      "00:06:45 [DEBUG] train episode 6031: reward = 195.00, steps = 195\n",
      "00:06:45 [DEBUG] train episode 6032: reward = 200.00, steps = 200\n",
      "00:06:45 [DEBUG] train episode 6033: reward = 146.00, steps = 146\n",
      "00:06:45 [DEBUG] train episode 6034: reward = 200.00, steps = 200\n",
      "00:06:45 [DEBUG] train episode 6035: reward = 200.00, steps = 200\n",
      "00:06:45 [DEBUG] train episode 6036: reward = 200.00, steps = 200\n",
      "00:06:45 [DEBUG] train episode 6037: reward = 200.00, steps = 200\n",
      "00:06:45 [DEBUG] train episode 6038: reward = 190.00, steps = 190\n",
      "00:06:45 [DEBUG] train episode 6039: reward = 132.00, steps = 132\n",
      "00:06:45 [DEBUG] train episode 6040: reward = 178.00, steps = 178\n",
      "00:06:45 [DEBUG] train episode 6041: reward = 180.00, steps = 180\n",
      "00:06:45 [DEBUG] train episode 6042: reward = 193.00, steps = 193\n",
      "00:06:45 [DEBUG] train episode 6043: reward = 164.00, steps = 164\n",
      "00:06:45 [DEBUG] train episode 6044: reward = 179.00, steps = 179\n",
      "00:06:46 [DEBUG] train episode 6045: reward = 181.00, steps = 181\n",
      "00:06:46 [DEBUG] train episode 6046: reward = 200.00, steps = 200\n",
      "00:06:46 [DEBUG] train episode 6047: reward = 200.00, steps = 200\n",
      "00:06:46 [DEBUG] train episode 6048: reward = 170.00, steps = 170\n",
      "00:06:46 [DEBUG] train episode 6049: reward = 133.00, steps = 133\n",
      "00:06:46 [DEBUG] train episode 6050: reward = 200.00, steps = 200\n",
      "00:06:46 [DEBUG] train episode 6051: reward = 167.00, steps = 167\n",
      "00:06:46 [DEBUG] train episode 6052: reward = 200.00, steps = 200\n",
      "00:06:46 [DEBUG] train episode 6053: reward = 195.00, steps = 195\n",
      "00:06:46 [DEBUG] train episode 6054: reward = 200.00, steps = 200\n",
      "00:06:46 [DEBUG] train episode 6055: reward = 180.00, steps = 180\n",
      "00:06:46 [DEBUG] train episode 6056: reward = 200.00, steps = 200\n",
      "00:06:46 [DEBUG] train episode 6057: reward = 143.00, steps = 143\n",
      "00:06:46 [DEBUG] train episode 6058: reward = 198.00, steps = 198\n",
      "00:06:46 [DEBUG] train episode 6059: reward = 163.00, steps = 163\n",
      "00:06:46 [DEBUG] train episode 6060: reward = 186.00, steps = 186\n",
      "00:06:46 [DEBUG] train episode 6061: reward = 200.00, steps = 200\n",
      "00:06:46 [DEBUG] train episode 6062: reward = 200.00, steps = 200\n",
      "00:06:47 [DEBUG] train episode 6063: reward = 182.00, steps = 182\n",
      "00:06:47 [DEBUG] train episode 6064: reward = 161.00, steps = 161\n",
      "00:06:47 [DEBUG] train episode 6065: reward = 200.00, steps = 200\n",
      "00:06:47 [DEBUG] train episode 6066: reward = 163.00, steps = 163\n",
      "00:06:47 [DEBUG] train episode 6067: reward = 200.00, steps = 200\n",
      "00:06:47 [DEBUG] train episode 6068: reward = 142.00, steps = 142\n",
      "00:06:47 [DEBUG] train episode 6069: reward = 176.00, steps = 176\n",
      "00:06:47 [DEBUG] train episode 6070: reward = 147.00, steps = 147\n",
      "00:06:47 [DEBUG] train episode 6071: reward = 200.00, steps = 200\n",
      "00:06:47 [DEBUG] train episode 6072: reward = 200.00, steps = 200\n",
      "00:06:47 [DEBUG] train episode 6073: reward = 163.00, steps = 163\n",
      "00:06:47 [DEBUG] train episode 6074: reward = 190.00, steps = 190\n",
      "00:06:47 [DEBUG] train episode 6075: reward = 200.00, steps = 200\n",
      "00:06:47 [DEBUG] train episode 6076: reward = 159.00, steps = 159\n",
      "00:06:47 [DEBUG] train episode 6077: reward = 164.00, steps = 164\n",
      "00:06:47 [DEBUG] train episode 6078: reward = 190.00, steps = 190\n",
      "00:06:47 [DEBUG] train episode 6079: reward = 200.00, steps = 200\n",
      "00:06:47 [DEBUG] train episode 6080: reward = 200.00, steps = 200\n",
      "00:06:47 [DEBUG] train episode 6081: reward = 200.00, steps = 200\n",
      "00:06:48 [DEBUG] train episode 6082: reward = 173.00, steps = 173\n",
      "00:06:48 [DEBUG] train episode 6083: reward = 175.00, steps = 175\n",
      "00:06:48 [DEBUG] train episode 6084: reward = 196.00, steps = 196\n",
      "00:06:48 [DEBUG] train episode 6085: reward = 200.00, steps = 200\n",
      "00:06:48 [DEBUG] train episode 6086: reward = 200.00, steps = 200\n",
      "00:06:48 [DEBUG] train episode 6087: reward = 161.00, steps = 161\n",
      "00:06:48 [DEBUG] train episode 6088: reward = 170.00, steps = 170\n",
      "00:06:48 [DEBUG] train episode 6089: reward = 200.00, steps = 200\n",
      "00:06:48 [DEBUG] train episode 6090: reward = 200.00, steps = 200\n",
      "00:06:48 [DEBUG] train episode 6091: reward = 165.00, steps = 165\n",
      "00:06:48 [DEBUG] train episode 6092: reward = 200.00, steps = 200\n",
      "00:06:48 [DEBUG] train episode 6093: reward = 200.00, steps = 200\n",
      "00:06:48 [DEBUG] train episode 6094: reward = 200.00, steps = 200\n",
      "00:06:48 [DEBUG] train episode 6095: reward = 200.00, steps = 200\n",
      "00:06:48 [DEBUG] train episode 6096: reward = 184.00, steps = 184\n",
      "00:06:48 [DEBUG] train episode 6097: reward = 200.00, steps = 200\n",
      "00:06:48 [DEBUG] train episode 6098: reward = 200.00, steps = 200\n",
      "00:06:48 [DEBUG] train episode 6099: reward = 139.00, steps = 139\n",
      "00:06:49 [DEBUG] train episode 6100: reward = 189.00, steps = 189\n",
      "00:06:49 [DEBUG] train episode 6101: reward = 200.00, steps = 200\n",
      "00:06:49 [DEBUG] train episode 6102: reward = 193.00, steps = 193\n",
      "00:06:49 [DEBUG] train episode 6103: reward = 200.00, steps = 200\n",
      "00:06:49 [DEBUG] train episode 6104: reward = 183.00, steps = 183\n",
      "00:06:49 [DEBUG] train episode 6105: reward = 200.00, steps = 200\n",
      "00:06:49 [DEBUG] train episode 6106: reward = 200.00, steps = 200\n",
      "00:06:49 [DEBUG] train episode 6107: reward = 200.00, steps = 200\n",
      "00:06:49 [DEBUG] train episode 6108: reward = 200.00, steps = 200\n",
      "00:06:49 [DEBUG] train episode 6109: reward = 200.00, steps = 200\n",
      "00:06:49 [DEBUG] train episode 6110: reward = 173.00, steps = 173\n",
      "00:06:49 [DEBUG] train episode 6111: reward = 200.00, steps = 200\n",
      "00:06:49 [DEBUG] train episode 6112: reward = 178.00, steps = 178\n",
      "00:06:49 [DEBUG] train episode 6113: reward = 195.00, steps = 195\n",
      "00:06:49 [DEBUG] train episode 6114: reward = 200.00, steps = 200\n",
      "00:06:49 [DEBUG] train episode 6115: reward = 151.00, steps = 151\n",
      "00:06:49 [DEBUG] train episode 6116: reward = 200.00, steps = 200\n",
      "00:06:50 [DEBUG] train episode 6117: reward = 200.00, steps = 200\n",
      "00:06:50 [DEBUG] train episode 6118: reward = 175.00, steps = 175\n",
      "00:06:50 [DEBUG] train episode 6119: reward = 200.00, steps = 200\n",
      "00:06:50 [DEBUG] train episode 6120: reward = 200.00, steps = 200\n",
      "00:06:50 [DEBUG] train episode 6121: reward = 161.00, steps = 161\n",
      "00:06:50 [DEBUG] train episode 6122: reward = 200.00, steps = 200\n",
      "00:06:50 [DEBUG] train episode 6123: reward = 198.00, steps = 198\n",
      "00:06:50 [DEBUG] train episode 6124: reward = 166.00, steps = 166\n",
      "00:06:50 [DEBUG] train episode 6125: reward = 174.00, steps = 174\n",
      "00:06:50 [DEBUG] train episode 6126: reward = 200.00, steps = 200\n",
      "00:06:50 [DEBUG] train episode 6127: reward = 200.00, steps = 200\n",
      "00:06:50 [DEBUG] train episode 6128: reward = 199.00, steps = 199\n",
      "00:06:50 [DEBUG] train episode 6129: reward = 200.00, steps = 200\n",
      "00:06:50 [DEBUG] train episode 6130: reward = 200.00, steps = 200\n",
      "00:06:50 [DEBUG] train episode 6131: reward = 200.00, steps = 200\n",
      "00:06:50 [DEBUG] train episode 6132: reward = 200.00, steps = 200\n",
      "00:06:50 [DEBUG] train episode 6133: reward = 200.00, steps = 200\n",
      "00:06:50 [DEBUG] train episode 6134: reward = 200.00, steps = 200\n",
      "00:06:51 [DEBUG] train episode 6135: reward = 200.00, steps = 200\n",
      "00:06:51 [DEBUG] train episode 6136: reward = 140.00, steps = 140\n",
      "00:06:51 [DEBUG] train episode 6137: reward = 200.00, steps = 200\n",
      "00:06:51 [DEBUG] train episode 6138: reward = 200.00, steps = 200\n",
      "00:06:51 [DEBUG] train episode 6139: reward = 183.00, steps = 183\n",
      "00:06:51 [DEBUG] train episode 6140: reward = 200.00, steps = 200\n",
      "00:06:51 [DEBUG] train episode 6141: reward = 181.00, steps = 181\n",
      "00:06:51 [DEBUG] train episode 6142: reward = 200.00, steps = 200\n",
      "00:06:51 [DEBUG] train episode 6143: reward = 200.00, steps = 200\n",
      "00:06:51 [DEBUG] train episode 6144: reward = 175.00, steps = 175\n",
      "00:06:51 [DEBUG] train episode 6145: reward = 200.00, steps = 200\n",
      "00:06:51 [DEBUG] train episode 6146: reward = 200.00, steps = 200\n",
      "00:06:51 [DEBUG] train episode 6147: reward = 200.00, steps = 200\n",
      "00:06:51 [DEBUG] train episode 6148: reward = 200.00, steps = 200\n",
      "00:06:51 [DEBUG] train episode 6149: reward = 200.00, steps = 200\n",
      "00:06:51 [DEBUG] train episode 6150: reward = 200.00, steps = 200\n",
      "00:06:51 [DEBUG] train episode 6151: reward = 200.00, steps = 200\n",
      "00:06:52 [DEBUG] train episode 6152: reward = 194.00, steps = 194\n",
      "00:06:52 [DEBUG] train episode 6153: reward = 172.00, steps = 172\n",
      "00:06:52 [DEBUG] train episode 6154: reward = 200.00, steps = 200\n",
      "00:06:52 [DEBUG] train episode 6155: reward = 200.00, steps = 200\n",
      "00:06:52 [DEBUG] train episode 6156: reward = 200.00, steps = 200\n",
      "00:06:52 [DEBUG] train episode 6157: reward = 192.00, steps = 192\n",
      "00:06:52 [DEBUG] train episode 6158: reward = 191.00, steps = 191\n",
      "00:06:52 [DEBUG] train episode 6159: reward = 180.00, steps = 180\n",
      "00:06:52 [DEBUG] train episode 6160: reward = 200.00, steps = 200\n",
      "00:06:52 [DEBUG] train episode 6161: reward = 200.00, steps = 200\n",
      "00:06:52 [DEBUG] train episode 6162: reward = 195.00, steps = 195\n",
      "00:06:52 [DEBUG] train episode 6163: reward = 200.00, steps = 200\n",
      "00:06:52 [DEBUG] train episode 6164: reward = 200.00, steps = 200\n",
      "00:06:52 [DEBUG] train episode 6165: reward = 200.00, steps = 200\n",
      "00:06:52 [DEBUG] train episode 6166: reward = 200.00, steps = 200\n",
      "00:06:52 [DEBUG] train episode 6167: reward = 200.00, steps = 200\n",
      "00:06:52 [DEBUG] train episode 6168: reward = 200.00, steps = 200\n",
      "00:06:53 [DEBUG] train episode 6169: reward = 200.00, steps = 200\n",
      "00:06:53 [DEBUG] train episode 6170: reward = 200.00, steps = 200\n",
      "00:06:53 [DEBUG] train episode 6171: reward = 200.00, steps = 200\n",
      "00:06:53 [DEBUG] train episode 6172: reward = 182.00, steps = 182\n",
      "00:06:53 [DEBUG] train episode 6173: reward = 200.00, steps = 200\n",
      "00:06:53 [DEBUG] train episode 6174: reward = 200.00, steps = 200\n",
      "00:06:53 [DEBUG] train episode 6175: reward = 200.00, steps = 200\n",
      "00:06:53 [DEBUG] train episode 6176: reward = 200.00, steps = 200\n",
      "00:06:53 [DEBUG] train episode 6177: reward = 200.00, steps = 200\n",
      "00:06:53 [DEBUG] train episode 6178: reward = 200.00, steps = 200\n",
      "00:06:53 [DEBUG] train episode 6179: reward = 200.00, steps = 200\n",
      "00:06:53 [DEBUG] train episode 6180: reward = 200.00, steps = 200\n",
      "00:06:53 [DEBUG] train episode 6181: reward = 176.00, steps = 176\n",
      "00:06:53 [DEBUG] train episode 6182: reward = 200.00, steps = 200\n",
      "00:06:53 [DEBUG] train episode 6183: reward = 200.00, steps = 200\n",
      "00:06:53 [DEBUG] train episode 6184: reward = 200.00, steps = 200\n",
      "00:06:53 [DEBUG] train episode 6185: reward = 200.00, steps = 200\n",
      "00:06:54 [DEBUG] train episode 6186: reward = 200.00, steps = 200\n",
      "00:06:54 [DEBUG] train episode 6187: reward = 200.00, steps = 200\n",
      "00:06:54 [DEBUG] train episode 6188: reward = 197.00, steps = 197\n",
      "00:06:54 [DEBUG] train episode 6189: reward = 200.00, steps = 200\n",
      "00:06:54 [DEBUG] train episode 6190: reward = 200.00, steps = 200\n",
      "00:06:54 [DEBUG] train episode 6191: reward = 200.00, steps = 200\n",
      "00:06:54 [DEBUG] train episode 6192: reward = 200.00, steps = 200\n",
      "00:06:54 [DEBUG] train episode 6193: reward = 200.00, steps = 200\n",
      "00:06:54 [DEBUG] train episode 6194: reward = 200.00, steps = 200\n",
      "00:06:54 [DEBUG] train episode 6195: reward = 168.00, steps = 168\n",
      "00:06:54 [DEBUG] train episode 6196: reward = 175.00, steps = 175\n",
      "00:06:54 [DEBUG] train episode 6197: reward = 200.00, steps = 200\n",
      "00:06:54 [DEBUG] train episode 6198: reward = 200.00, steps = 200\n",
      "00:06:54 [DEBUG] train episode 6199: reward = 200.00, steps = 200\n",
      "00:06:54 [DEBUG] train episode 6200: reward = 200.00, steps = 200\n",
      "00:06:54 [DEBUG] train episode 6201: reward = 170.00, steps = 170\n",
      "00:06:54 [DEBUG] train episode 6202: reward = 183.00, steps = 183\n",
      "00:06:54 [DEBUG] train episode 6203: reward = 200.00, steps = 200\n",
      "00:06:55 [DEBUG] train episode 6204: reward = 191.00, steps = 191\n",
      "00:06:55 [DEBUG] train episode 6205: reward = 181.00, steps = 181\n",
      "00:06:55 [DEBUG] train episode 6206: reward = 200.00, steps = 200\n",
      "00:06:55 [DEBUG] train episode 6207: reward = 200.00, steps = 200\n",
      "00:06:55 [DEBUG] train episode 6208: reward = 200.00, steps = 200\n",
      "00:06:55 [DEBUG] train episode 6209: reward = 200.00, steps = 200\n",
      "00:06:55 [DEBUG] train episode 6210: reward = 200.00, steps = 200\n",
      "00:06:55 [DEBUG] train episode 6211: reward = 200.00, steps = 200\n",
      "00:06:55 [DEBUG] train episode 6212: reward = 200.00, steps = 200\n",
      "00:06:55 [DEBUG] train episode 6213: reward = 171.00, steps = 171\n",
      "00:06:55 [DEBUG] train episode 6214: reward = 180.00, steps = 180\n",
      "00:06:55 [DEBUG] train episode 6215: reward = 200.00, steps = 200\n",
      "00:06:55 [DEBUG] train episode 6216: reward = 200.00, steps = 200\n",
      "00:06:55 [DEBUG] train episode 6217: reward = 200.00, steps = 200\n",
      "00:06:55 [DEBUG] train episode 6218: reward = 178.00, steps = 178\n",
      "00:06:55 [DEBUG] train episode 6219: reward = 196.00, steps = 196\n",
      "00:06:55 [DEBUG] train episode 6220: reward = 191.00, steps = 191\n",
      "00:06:56 [DEBUG] train episode 6221: reward = 200.00, steps = 200\n",
      "00:06:56 [DEBUG] train episode 6222: reward = 200.00, steps = 200\n",
      "00:06:56 [DEBUG] train episode 6223: reward = 200.00, steps = 200\n",
      "00:06:56 [DEBUG] train episode 6224: reward = 200.00, steps = 200\n",
      "00:06:56 [DEBUG] train episode 6225: reward = 177.00, steps = 177\n",
      "00:06:56 [DEBUG] train episode 6226: reward = 200.00, steps = 200\n",
      "00:06:56 [DEBUG] train episode 6227: reward = 194.00, steps = 194\n",
      "00:06:56 [DEBUG] train episode 6228: reward = 190.00, steps = 190\n",
      "00:06:56 [DEBUG] train episode 6229: reward = 200.00, steps = 200\n",
      "00:06:56 [DEBUG] train episode 6230: reward = 200.00, steps = 200\n",
      "00:06:56 [DEBUG] train episode 6231: reward = 187.00, steps = 187\n",
      "00:06:56 [DEBUG] train episode 6232: reward = 199.00, steps = 199\n",
      "00:06:56 [DEBUG] train episode 6233: reward = 190.00, steps = 190\n",
      "00:06:56 [DEBUG] train episode 6234: reward = 200.00, steps = 200\n",
      "00:06:56 [DEBUG] train episode 6235: reward = 192.00, steps = 192\n",
      "00:06:56 [DEBUG] train episode 6236: reward = 200.00, steps = 200\n",
      "00:06:56 [DEBUG] train episode 6237: reward = 170.00, steps = 170\n",
      "00:06:56 [DEBUG] train episode 6238: reward = 200.00, steps = 200\n",
      "00:06:57 [DEBUG] train episode 6239: reward = 200.00, steps = 200\n",
      "00:06:57 [DEBUG] train episode 6240: reward = 200.00, steps = 200\n",
      "00:06:57 [DEBUG] train episode 6241: reward = 200.00, steps = 200\n",
      "00:06:57 [DEBUG] train episode 6242: reward = 195.00, steps = 195\n",
      "00:06:57 [DEBUG] train episode 6243: reward = 200.00, steps = 200\n",
      "00:06:57 [DEBUG] train episode 6244: reward = 156.00, steps = 156\n",
      "00:06:57 [DEBUG] train episode 6245: reward = 200.00, steps = 200\n",
      "00:06:57 [DEBUG] train episode 6246: reward = 200.00, steps = 200\n",
      "00:06:57 [DEBUG] train episode 6247: reward = 153.00, steps = 153\n",
      "00:06:57 [DEBUG] train episode 6248: reward = 175.00, steps = 175\n",
      "00:06:57 [DEBUG] train episode 6249: reward = 200.00, steps = 200\n",
      "00:06:57 [DEBUG] train episode 6250: reward = 185.00, steps = 185\n",
      "00:06:57 [DEBUG] train episode 6251: reward = 152.00, steps = 152\n",
      "00:06:57 [DEBUG] train episode 6252: reward = 200.00, steps = 200\n",
      "00:06:57 [DEBUG] train episode 6253: reward = 194.00, steps = 194\n",
      "00:06:57 [DEBUG] train episode 6254: reward = 171.00, steps = 171\n",
      "00:06:57 [DEBUG] train episode 6255: reward = 200.00, steps = 200\n",
      "00:06:58 [DEBUG] train episode 6256: reward = 181.00, steps = 181\n",
      "00:06:58 [DEBUG] train episode 6257: reward = 166.00, steps = 166\n",
      "00:06:58 [DEBUG] train episode 6258: reward = 200.00, steps = 200\n",
      "00:06:58 [DEBUG] train episode 6259: reward = 191.00, steps = 191\n",
      "00:06:58 [DEBUG] train episode 6260: reward = 200.00, steps = 200\n",
      "00:06:58 [DEBUG] train episode 6261: reward = 198.00, steps = 198\n",
      "00:06:58 [DEBUG] train episode 6262: reward = 184.00, steps = 184\n",
      "00:06:58 [DEBUG] train episode 6263: reward = 200.00, steps = 200\n",
      "00:06:58 [DEBUG] train episode 6264: reward = 200.00, steps = 200\n",
      "00:06:58 [DEBUG] train episode 6265: reward = 154.00, steps = 154\n",
      "00:06:58 [DEBUG] train episode 6266: reward = 200.00, steps = 200\n",
      "00:06:58 [DEBUG] train episode 6267: reward = 164.00, steps = 164\n",
      "00:06:58 [DEBUG] train episode 6268: reward = 200.00, steps = 200\n",
      "00:06:58 [DEBUG] train episode 6269: reward = 200.00, steps = 200\n",
      "00:06:58 [DEBUG] train episode 6270: reward = 200.00, steps = 200\n",
      "00:06:58 [DEBUG] train episode 6271: reward = 200.00, steps = 200\n",
      "00:06:58 [DEBUG] train episode 6272: reward = 162.00, steps = 162\n",
      "00:06:58 [DEBUG] train episode 6273: reward = 160.00, steps = 160\n",
      "00:06:59 [DEBUG] train episode 6274: reward = 200.00, steps = 200\n",
      "00:06:59 [DEBUG] train episode 6275: reward = 200.00, steps = 200\n",
      "00:06:59 [DEBUG] train episode 6276: reward = 197.00, steps = 197\n",
      "00:06:59 [DEBUG] train episode 6277: reward = 196.00, steps = 196\n",
      "00:06:59 [DEBUG] train episode 6278: reward = 161.00, steps = 161\n",
      "00:06:59 [DEBUG] train episode 6279: reward = 172.00, steps = 172\n",
      "00:06:59 [DEBUG] train episode 6280: reward = 138.00, steps = 138\n",
      "00:06:59 [DEBUG] train episode 6281: reward = 161.00, steps = 161\n",
      "00:06:59 [DEBUG] train episode 6282: reward = 200.00, steps = 200\n",
      "00:06:59 [DEBUG] train episode 6283: reward = 155.00, steps = 155\n",
      "00:06:59 [DEBUG] train episode 6284: reward = 177.00, steps = 177\n",
      "00:06:59 [DEBUG] train episode 6285: reward = 196.00, steps = 196\n",
      "00:06:59 [DEBUG] train episode 6286: reward = 200.00, steps = 200\n",
      "00:06:59 [DEBUG] train episode 6287: reward = 158.00, steps = 158\n",
      "00:06:59 [DEBUG] train episode 6288: reward = 150.00, steps = 150\n",
      "00:06:59 [DEBUG] train episode 6289: reward = 200.00, steps = 200\n",
      "00:06:59 [DEBUG] train episode 6290: reward = 191.00, steps = 191\n",
      "00:06:59 [DEBUG] train episode 6291: reward = 185.00, steps = 185\n",
      "00:06:59 [DEBUG] train episode 6292: reward = 193.00, steps = 193\n",
      "00:07:00 [DEBUG] train episode 6293: reward = 200.00, steps = 200\n",
      "00:07:00 [DEBUG] train episode 6294: reward = 177.00, steps = 177\n",
      "00:07:00 [DEBUG] train episode 6295: reward = 200.00, steps = 200\n",
      "00:07:00 [DEBUG] train episode 6296: reward = 200.00, steps = 200\n",
      "00:07:00 [DEBUG] train episode 6297: reward = 200.00, steps = 200\n",
      "00:07:00 [DEBUG] train episode 6298: reward = 200.00, steps = 200\n",
      "00:07:00 [DEBUG] train episode 6299: reward = 200.00, steps = 200\n",
      "00:07:00 [DEBUG] train episode 6300: reward = 200.00, steps = 200\n",
      "00:07:00 [DEBUG] train episode 6301: reward = 169.00, steps = 169\n",
      "00:07:00 [DEBUG] train episode 6302: reward = 200.00, steps = 200\n",
      "00:07:00 [DEBUG] train episode 6303: reward = 200.00, steps = 200\n",
      "00:07:00 [DEBUG] train episode 6304: reward = 200.00, steps = 200\n",
      "00:07:00 [DEBUG] train episode 6305: reward = 200.00, steps = 200\n",
      "00:07:00 [DEBUG] train episode 6306: reward = 177.00, steps = 177\n",
      "00:07:00 [DEBUG] train episode 6307: reward = 200.00, steps = 200\n",
      "00:07:00 [DEBUG] train episode 6308: reward = 200.00, steps = 200\n",
      "00:07:00 [DEBUG] train episode 6309: reward = 200.00, steps = 200\n",
      "00:07:01 [DEBUG] train episode 6310: reward = 190.00, steps = 190\n",
      "00:07:01 [DEBUG] train episode 6311: reward = 200.00, steps = 200\n",
      "00:07:01 [DEBUG] train episode 6312: reward = 182.00, steps = 182\n",
      "00:07:01 [DEBUG] train episode 6313: reward = 200.00, steps = 200\n",
      "00:07:01 [DEBUG] train episode 6314: reward = 200.00, steps = 200\n",
      "00:07:01 [DEBUG] train episode 6315: reward = 200.00, steps = 200\n",
      "00:07:01 [DEBUG] train episode 6316: reward = 200.00, steps = 200\n",
      "00:07:01 [DEBUG] train episode 6317: reward = 200.00, steps = 200\n",
      "00:07:01 [DEBUG] train episode 6318: reward = 200.00, steps = 200\n",
      "00:07:01 [DEBUG] train episode 6319: reward = 200.00, steps = 200\n",
      "00:07:01 [DEBUG] train episode 6320: reward = 200.00, steps = 200\n",
      "00:07:01 [DEBUG] train episode 6321: reward = 200.00, steps = 200\n",
      "00:07:01 [DEBUG] train episode 6322: reward = 200.00, steps = 200\n",
      "00:07:01 [DEBUG] train episode 6323: reward = 200.00, steps = 200\n",
      "00:07:01 [DEBUG] train episode 6324: reward = 200.00, steps = 200\n",
      "00:07:01 [DEBUG] train episode 6325: reward = 200.00, steps = 200\n",
      "00:07:02 [DEBUG] train episode 6326: reward = 200.00, steps = 200\n",
      "00:07:02 [DEBUG] train episode 6327: reward = 166.00, steps = 166\n",
      "00:07:02 [DEBUG] train episode 6328: reward = 200.00, steps = 200\n",
      "00:07:02 [DEBUG] train episode 6329: reward = 200.00, steps = 200\n",
      "00:07:02 [DEBUG] train episode 6330: reward = 197.00, steps = 197\n",
      "00:07:02 [DEBUG] train episode 6331: reward = 191.00, steps = 191\n",
      "00:07:02 [DEBUG] train episode 6332: reward = 200.00, steps = 200\n",
      "00:07:02 [DEBUG] train episode 6333: reward = 200.00, steps = 200\n",
      "00:07:02 [DEBUG] train episode 6334: reward = 200.00, steps = 200\n",
      "00:07:02 [DEBUG] train episode 6335: reward = 200.00, steps = 200\n",
      "00:07:02 [DEBUG] train episode 6336: reward = 195.00, steps = 195\n",
      "00:07:02 [DEBUG] train episode 6337: reward = 172.00, steps = 172\n",
      "00:07:02 [DEBUG] train episode 6338: reward = 200.00, steps = 200\n",
      "00:07:02 [DEBUG] train episode 6339: reward = 200.00, steps = 200\n",
      "00:07:02 [DEBUG] train episode 6340: reward = 200.00, steps = 200\n",
      "00:07:02 [DEBUG] train episode 6341: reward = 181.00, steps = 181\n",
      "00:07:02 [DEBUG] train episode 6342: reward = 200.00, steps = 200\n",
      "00:07:02 [DEBUG] train episode 6343: reward = 161.00, steps = 161\n",
      "00:07:03 [DEBUG] train episode 6344: reward = 200.00, steps = 200\n",
      "00:07:03 [DEBUG] train episode 6345: reward = 181.00, steps = 181\n",
      "00:07:03 [DEBUG] train episode 6346: reward = 200.00, steps = 200\n",
      "00:07:03 [DEBUG] train episode 6347: reward = 193.00, steps = 193\n",
      "00:07:03 [DEBUG] train episode 6348: reward = 200.00, steps = 200\n",
      "00:07:03 [DEBUG] train episode 6349: reward = 200.00, steps = 200\n",
      "00:07:03 [DEBUG] train episode 6350: reward = 200.00, steps = 200\n",
      "00:07:03 [DEBUG] train episode 6351: reward = 200.00, steps = 200\n",
      "00:07:03 [DEBUG] train episode 6352: reward = 200.00, steps = 200\n",
      "00:07:03 [DEBUG] train episode 6353: reward = 200.00, steps = 200\n",
      "00:07:03 [DEBUG] train episode 6354: reward = 200.00, steps = 200\n",
      "00:07:03 [DEBUG] train episode 6355: reward = 200.00, steps = 200\n",
      "00:07:03 [DEBUG] train episode 6356: reward = 200.00, steps = 200\n",
      "00:07:03 [DEBUG] train episode 6357: reward = 200.00, steps = 200\n",
      "00:07:03 [DEBUG] train episode 6358: reward = 200.00, steps = 200\n",
      "00:07:03 [DEBUG] train episode 6359: reward = 200.00, steps = 200\n",
      "00:07:04 [DEBUG] train episode 6360: reward = 200.00, steps = 200\n",
      "00:07:04 [DEBUG] train episode 6361: reward = 200.00, steps = 200\n",
      "00:07:04 [DEBUG] train episode 6362: reward = 200.00, steps = 200\n",
      "00:07:04 [DEBUG] train episode 6363: reward = 200.00, steps = 200\n",
      "00:07:04 [DEBUG] train episode 6364: reward = 200.00, steps = 200\n",
      "00:07:04 [DEBUG] train episode 6365: reward = 200.00, steps = 200\n",
      "00:07:04 [INFO] ==== test ====\n",
      "00:07:04 [DEBUG] test episode 0: reward = 200.00, steps = 200\n",
      "00:07:04 [DEBUG] test episode 1: reward = 184.00, steps = 184\n",
      "00:07:04 [DEBUG] test episode 2: reward = 200.00, steps = 200\n",
      "00:07:04 [DEBUG] test episode 3: reward = 200.00, steps = 200\n",
      "00:07:04 [DEBUG] test episode 4: reward = 200.00, steps = 200\n",
      "00:07:04 [DEBUG] test episode 5: reward = 194.00, steps = 194\n",
      "00:07:04 [DEBUG] test episode 6: reward = 200.00, steps = 200\n",
      "00:07:04 [DEBUG] test episode 7: reward = 200.00, steps = 200\n",
      "00:07:04 [DEBUG] test episode 8: reward = 200.00, steps = 200\n",
      "00:07:04 [DEBUG] test episode 9: reward = 200.00, steps = 200\n",
      "00:07:04 [DEBUG] test episode 10: reward = 168.00, steps = 168\n",
      "00:07:04 [DEBUG] test episode 11: reward = 200.00, steps = 200\n",
      "00:07:05 [DEBUG] test episode 12: reward = 182.00, steps = 182\n",
      "00:07:05 [DEBUG] test episode 13: reward = 200.00, steps = 200\n",
      "00:07:05 [DEBUG] test episode 14: reward = 200.00, steps = 200\n",
      "00:07:05 [DEBUG] test episode 15: reward = 200.00, steps = 200\n",
      "00:07:05 [DEBUG] test episode 16: reward = 200.00, steps = 200\n",
      "00:07:05 [DEBUG] test episode 17: reward = 196.00, steps = 196\n",
      "00:07:05 [DEBUG] test episode 18: reward = 200.00, steps = 200\n",
      "00:07:05 [DEBUG] test episode 19: reward = 176.00, steps = 176\n",
      "00:07:05 [DEBUG] test episode 20: reward = 188.00, steps = 188\n",
      "00:07:05 [DEBUG] test episode 21: reward = 188.00, steps = 188\n",
      "00:07:05 [DEBUG] test episode 22: reward = 200.00, steps = 200\n",
      "00:07:05 [DEBUG] test episode 23: reward = 200.00, steps = 200\n",
      "00:07:05 [DEBUG] test episode 24: reward = 200.00, steps = 200\n",
      "00:07:05 [DEBUG] test episode 25: reward = 200.00, steps = 200\n",
      "00:07:05 [DEBUG] test episode 26: reward = 200.00, steps = 200\n",
      "00:07:05 [DEBUG] test episode 27: reward = 200.00, steps = 200\n",
      "00:07:05 [DEBUG] test episode 28: reward = 185.00, steps = 185\n",
      "00:07:05 [DEBUG] test episode 29: reward = 200.00, steps = 200\n",
      "00:07:06 [DEBUG] test episode 30: reward = 200.00, steps = 200\n",
      "00:07:06 [DEBUG] test episode 31: reward = 200.00, steps = 200\n",
      "00:07:06 [DEBUG] test episode 32: reward = 200.00, steps = 200\n",
      "00:07:06 [DEBUG] test episode 33: reward = 200.00, steps = 200\n",
      "00:07:06 [DEBUG] test episode 34: reward = 200.00, steps = 200\n",
      "00:07:06 [DEBUG] test episode 35: reward = 200.00, steps = 200\n",
      "00:07:06 [DEBUG] test episode 36: reward = 200.00, steps = 200\n",
      "00:07:06 [DEBUG] test episode 37: reward = 188.00, steps = 188\n",
      "00:07:06 [DEBUG] test episode 38: reward = 200.00, steps = 200\n",
      "00:07:06 [DEBUG] test episode 39: reward = 200.00, steps = 200\n",
      "00:07:06 [DEBUG] test episode 40: reward = 200.00, steps = 200\n",
      "00:07:06 [DEBUG] test episode 41: reward = 200.00, steps = 200\n",
      "00:07:06 [DEBUG] test episode 42: reward = 200.00, steps = 200\n",
      "00:07:06 [DEBUG] test episode 43: reward = 164.00, steps = 164\n",
      "00:07:06 [DEBUG] test episode 44: reward = 200.00, steps = 200\n",
      "00:07:06 [DEBUG] test episode 45: reward = 200.00, steps = 200\n",
      "00:07:06 [DEBUG] test episode 46: reward = 184.00, steps = 184\n",
      "00:07:07 [DEBUG] test episode 47: reward = 200.00, steps = 200\n",
      "00:07:07 [DEBUG] test episode 48: reward = 200.00, steps = 200\n",
      "00:07:07 [DEBUG] test episode 49: reward = 200.00, steps = 200\n",
      "00:07:07 [DEBUG] test episode 50: reward = 193.00, steps = 193\n",
      "00:07:07 [DEBUG] test episode 51: reward = 200.00, steps = 200\n",
      "00:07:07 [DEBUG] test episode 52: reward = 200.00, steps = 200\n",
      "00:07:07 [DEBUG] test episode 53: reward = 200.00, steps = 200\n",
      "00:07:07 [DEBUG] test episode 54: reward = 198.00, steps = 198\n",
      "00:07:07 [DEBUG] test episode 55: reward = 199.00, steps = 199\n",
      "00:07:07 [DEBUG] test episode 56: reward = 200.00, steps = 200\n",
      "00:07:07 [DEBUG] test episode 57: reward = 200.00, steps = 200\n",
      "00:07:07 [DEBUG] test episode 58: reward = 200.00, steps = 200\n",
      "00:07:07 [DEBUG] test episode 59: reward = 187.00, steps = 187\n",
      "00:07:07 [DEBUG] test episode 60: reward = 200.00, steps = 200\n",
      "00:07:07 [DEBUG] test episode 61: reward = 184.00, steps = 184\n",
      "00:07:07 [DEBUG] test episode 62: reward = 200.00, steps = 200\n",
      "00:07:07 [DEBUG] test episode 63: reward = 200.00, steps = 200\n",
      "00:07:07 [DEBUG] test episode 64: reward = 200.00, steps = 200\n",
      "00:07:08 [DEBUG] test episode 65: reward = 188.00, steps = 188\n",
      "00:07:08 [DEBUG] test episode 66: reward = 169.00, steps = 169\n",
      "00:07:08 [DEBUG] test episode 67: reward = 200.00, steps = 200\n",
      "00:07:08 [DEBUG] test episode 68: reward = 200.00, steps = 200\n",
      "00:07:08 [DEBUG] test episode 69: reward = 200.00, steps = 200\n",
      "00:07:08 [DEBUG] test episode 70: reward = 177.00, steps = 177\n",
      "00:07:08 [DEBUG] test episode 71: reward = 200.00, steps = 200\n",
      "00:07:08 [DEBUG] test episode 72: reward = 200.00, steps = 200\n",
      "00:07:08 [DEBUG] test episode 73: reward = 200.00, steps = 200\n",
      "00:07:08 [DEBUG] test episode 74: reward = 200.00, steps = 200\n",
      "00:07:08 [DEBUG] test episode 75: reward = 200.00, steps = 200\n",
      "00:07:08 [DEBUG] test episode 76: reward = 200.00, steps = 200\n",
      "00:07:08 [DEBUG] test episode 77: reward = 200.00, steps = 200\n",
      "00:07:08 [DEBUG] test episode 78: reward = 200.00, steps = 200\n",
      "00:07:08 [DEBUG] test episode 79: reward = 200.00, steps = 200\n",
      "00:07:08 [DEBUG] test episode 80: reward = 200.00, steps = 200\n",
      "00:07:08 [DEBUG] test episode 81: reward = 200.00, steps = 200\n",
      "00:07:08 [DEBUG] test episode 82: reward = 200.00, steps = 200\n",
      "00:07:09 [DEBUG] test episode 83: reward = 200.00, steps = 200\n",
      "00:07:09 [DEBUG] test episode 84: reward = 189.00, steps = 189\n",
      "00:07:09 [DEBUG] test episode 85: reward = 200.00, steps = 200\n",
      "00:07:09 [DEBUG] test episode 86: reward = 200.00, steps = 200\n",
      "00:07:09 [DEBUG] test episode 87: reward = 200.00, steps = 200\n",
      "00:07:09 [DEBUG] test episode 88: reward = 200.00, steps = 200\n",
      "00:07:09 [DEBUG] test episode 89: reward = 200.00, steps = 200\n",
      "00:07:09 [DEBUG] test episode 90: reward = 200.00, steps = 200\n",
      "00:07:09 [DEBUG] test episode 91: reward = 200.00, steps = 200\n",
      "00:07:09 [DEBUG] test episode 92: reward = 200.00, steps = 200\n",
      "00:07:09 [DEBUG] test episode 93: reward = 200.00, steps = 200\n",
      "00:07:09 [DEBUG] test episode 94: reward = 184.00, steps = 184\n",
      "00:07:09 [DEBUG] test episode 95: reward = 200.00, steps = 200\n",
      "00:07:09 [DEBUG] test episode 96: reward = 197.00, steps = 197\n",
      "00:07:09 [DEBUG] test episode 97: reward = 200.00, steps = 200\n",
      "00:07:09 [DEBUG] test episode 98: reward = 200.00, steps = 200\n",
      "00:07:09 [DEBUG] test episode 99: reward = 200.00, steps = 200\n",
      "00:07:09 [INFO] average episode reward = 196.62 ± 7.61\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAArsUlEQVR4nO3deXxU5b348c83OyEJIRAgkGBYwr4bNhFkUUBQqVQR7EWsC9deaLVaW7S7vVSuVdvaXq20eq2/W1DrUq27tVXqlUpBwYKAgERAESIuoCBLeH5/5AxMkjP7OTNzznzfr1demXnO9p1k5jvPec5znkeMMSillPKXrFQHoJRSynma3JVSyoc0uSullA9pcldKKR/S5K6UUj6Uk+oAANq3b2+qq6tTHYZSSnnKmjVrPjTGlNstS4vkXl1dzerVq1MdhlJKeYqIvBtqmTbLKKWUD2lyV0opH9LkrpRSPqTJXSmlfEiTu1JK+VDE5C4iVSLyNxHZKCIbRORqq7xMRF4QkS3W77ZB29wgIltFZLOITHHzBSillGopmpr7MeA6Y0xfYBSwQET6AYuAF40xNcCL1nOsZbOB/sBU4E4RyXYjeKWUUvYi9nM3xuwGdluPD4jIRqALMAMYb632e+Al4DtW+QPGmMPAdhHZCowAVjodfDr6+PMjvPDWHvJzs5gxpItj+1278xN2f3KIf7yzjwUTetKhpIDH3tjF5H6dWP3ux+z77DAA2+o/Iy87m/Xvf8rCCT2Z8d//B8B3pvahMC+bvQe+oFfHYt7dd5DsLOHpf+1mw/v7mT28igf+uZPTerTj1W37bGPo06mYTR8caFJ2Rq9yXn67HoCBXdowd9Qp1O37nDtf2gbAtWf14pR2hVz9wFom9unAOYMqOPDFMW568i2KC3KYO+oUBlWWUtOhiPG3vsQ5gyro1bGY2194m5tm9OeOF7ey9JJTuePFLby0uZ7+nUsoLcxl/6FjFOXnsPKdfQyuKmXdzk9axDuqexmDq0q595XtTOzTgVe37uOcwZ1ZvmpH1H/3EdVl7PjoIB/s/yLiuoMr27Bu16chl/fsUMSOfQeZMqATf924h8+PNABQXJDD7bOGcMOjb1JVVkjP8iJGdW/HdX9cx0W1VTy4eidZAseDRufuVFLAdZN7cf3Db9KppCCq+CKZ0Lucv22uP/F8/rjuLF3xDjdO68NLm+tPvC+6l7emU0lByPeJnc5tClg0rS/XPPBGk9cRjQtOreThNbvoW1FC/YHDjOxWxscHj3BhbSXffHAdRfk5fHb4mO22v5ozlB//eQPfP6cfJa1yeXlzPSu21JOfk82AziW8tv0jdnx00Pa9nQy9Oxbz9Uk9OWdQZ8f3LbGM5y4i1cAKYACwwxhTGrTsY2NMWxH5NfAPY8z/WuX3AM8YYx5utq/5wHyArl27nvruuyH74nvKRXev5LXtHwHw5NdPZ0CXNo7st3rRUycftyvk9ouGMPPOV5k5rAuPvv6eI8dQSiXfuYM786s5Q+PaVkTWGGNq7ZZFfUFVRIqAR4BrjDH7w61qU9biG8QYs9QYU2uMqS0vt7171pPe++TQicdfHG1w5Rg7PjrI51ZN5cl1u105hlIqOeJN7JFEldxFJJfGxP4HY8yjVvEeEamwllcAe63yXUBV0OaVwPvOhJs6G97/lL0HIp/67vr4UMR1nHSk4XhSj6eU8oZoessIcA+w0Rhze9CiJ4B51uN5wONB5bNFJF9EugE1wCrnQk6N6Xe8wsRbX45pG53AUCmVKtEMHDYGmAv8S0TWWmU3AkuAh0TkcmAHcCGAMWaDiDwEvEVjT5sFxhh32ieSLNRFG6WUSjfR9JZ5Bft2dIBJIbZZDCxOIC7fqvvwczq1KaAgV3uHKqXco3eoJtHhYw2Mv/Ulrn7gjYT2o809SqlINLkn0dGGxrT8ypYPUxyJUsrvNLl7kAAx3J6glMpAmtyVUsqHNLm7yM3atYS6xK2UUmhyV0opX9LkHqPqRU9x1IW7Qv9v64dUL3qK9e+FHnhKKaWipck9DoePOZ/cX3hrDwCrrEHHwol1VD2lVObR5K6UUj6kyV0ppXxIk7tHaT93pVQ4mtzjEG0vxFgmQlFKKSdpck8z2n9dKeUETe4uEhcz9eYUzPeolPIOTe4ucrNZ5pbnNrm2b6WU92lyj4M2nSil0p0m9zRjTGOTy81Pb9QLskqpuEUzh+q9IrJXRNYHlT0oImutn7rA9HsiUi0ih4KW/cbF2H1r9tKV3L3iHT4+eDTVoSilPCqaOVTvA34N3B8oMMZcFHgsIrcBwQOibDPGDHEovowjcnKmJW39UUrFK2LN3RizArAd8EQau4PMApY7HJdrPj98jNtfeJtjDgz+ZYzh7pe3sfvTQw5EFuuxk35IpZSHJNrmPhbYY4zZElTWTUTeEJGXRWRsqA1FZL6IrBaR1fX19QmGEb3bnn+bO17cwqNvvJfwvt7dd5Cbn9nE/PvXOBCZvftX1rFn/xeu7V8p5U/RNMuEM4emtfbdQFdjzD4RORX4k4j0N8bsb76hMWYpsBSgtrY2afXQQ0cbABwZtrfBqj5/fviY7fJEX9R7nxziB49v4OE1u1os0x47Sqlw4q65i0gOMBN4MFBmjDlsjNlnPV4DbAN6JRpkuhEXWsPtesYsW7UDgE9sLqy6EYNSyj8SaZY5E9hkjDlRrRSRchHJth53B2qAdxILMbMEp+xlr+1IWRxKqcR8b3rflB4/mq6Qy4GVQG8R2SUil1uLZtPyQuo44E0RWQc8DFxljIk8+0QSrNr+UUoufLrliAuzQSmlnHPF2O4pPX7ENndjzJwQ5ZfalD0CPJJ4WM6bdfdKWuVm86WhXVIdilJKuS6j7lANXExNVPOLmYHW8uM6/51SKk1kVHJ3WvNLmr97JfHLC9rcopS/zBnRNSXHTbQrpAqy/cODTZ7HcqPRe5809mX/6dObKC3MdTIspVSKvPPTaYjA8lXJ7xyhNfc0sf+QjiOjlN9kZYntvA7V7QrdP7brR0hTTty+33wXTt1YpD3YlfK35745zvVjZFxyd+POznC7/PTgUb6I5kJu0E70sqxS/pafk+36MTIuubsx4Fa4XQ6+6Xm+fNerzh9UKaXCyLjkHuBEDb75LkLtcsP7LYbWiWm/SikVq4xN7ulGE7pSmcfNnnHaFdJFxqHWcx0BUin/+cu1Z9BWk3tm04k5lPKfnh2KXN2/Nsu4KJZhebV2rpRyktbcHRAYi73lmDORq9xTfr6C7uWt3QhLKZXBtOYep3tf2c7E216Oa9vPjzSwde8BADbvOcAz6z/QyTeUUo7S5B6nm558K6Htn13/QZPn2iyjlHJSxib3RC5SNt82MHaE1r6VUuki45K71pCVUpkgmmn27hWRvSKyPqjsRyLynoistX6mBS27QUS2ishmEZniVuDpKJYvjljOHHZ8dDDySkql2LIrR6Y6hLQ1oXd50o8ZTc39PmCqTfnPjTFDrJ+nAUSkH41zq/a3trkzMGF2unBlbBlrp4ns+9Vt+xyKRqnUOK1Hez0zDuGOOUOZPy65c6pGTO7GmBVAtJNczwAeMMYcNsZsB7YCIxKIzzWh3oQ3P7ORNe86NKe33nyklAKKC3K5cVrfpB4zkTb3hSLyptVs09Yq6wLsDFpnl1XWgojMF5HVIrK6vr4+gTCcdffL7/Dlu1bGta3WWpRS6SLe5H4X0AMYAuwGbrPK7dKbbf3VGLPUGFNrjKktL09+e5RSSvlZXMndGLPHGNNgjDkO/JaTTS+7gKqgVSuB9xML0TucqrjbTcullFKxiCu5i0hF0NPzgUBPmieA2SKSLyLdgBpgVWIh+tdD/9xpW250pDClVIIiji0jIsuB8UB7EdkF/BAYLyJDaGxyqQP+HcAYs0FEHgLeAo4BC4wxUcwx55wtew6wec8BzhnUmRVv19M6P5tTTylLZghR+/Yjb6Y6BKUcJWg/gnQRMbkbY+bYFN8TZv3FwOJEgkrEWT9fAcA5gzpzyb2NJw11S6a3WM+NynEszSnhDq/NMsor8nOyOHzseKrDUDb0DtU4RDsJh9ZglN81f4/rez59ZFxyd8LaHZ+kOgQVQfuivFSHkBk0m6ctTe4hbPpgP9vqP7Nd9vmRlpcRDh9r4C8b97gdllJppflZbCY1KN40o3+qQwhLJ+sIYeov/g7Yt9fbufnpTez6+JCbISmVdrRjV/rybc198wcHwi5/e0/45bEwwE4d3CutHNekkxSZ+Gce16vxpstQHR9KCnLo06k4mSHZ8m1y//TQUdvyQE3j/pXvRrWfL4428Nb7+50K64SjDdrDQHlfQU7TFHLJ6OrUBJJEv7hoCHVLppPVLLdPH9h4+8+bP5rCs9eMS0FkTfk2uTvl+offZNodf49r24Yw1cdf/XVryGUffX4kruMplWy5zZL7D8/tx6rvTkpRNMlR1tr+Yv2v5gxl6+KzI24/pmc7p0OypW3uEbz+7sdxb/urv25xMBIVi3gu7E0fWMFT/9rteCyZRETI8vF9Gpt+Yjf6eaOsLCErwjtv00+mktO8yu8Srbk7xK6Ovm7npyce793/RfKCyWDTB1bw1k3xzRFTmBd+6oGrJ9WceHxm345xHcNvbEcK9HFDfEFuYtNTFORmk5OdnLSryT0Ozd/Q0XwPj/jpixyy6UKpnJWfm0VhXk7MF/rW/XByxHWGV58cxuLOrwyL8QiZI9qb/JS7NLnHwe6tG02C/+KoJvd01aZVbsR1Tq9pf+JxXo5+dEKyPiDZDjU/PL5gjCP7cZqkea9+375DQ42s6EZzoAGOaO8XpQBnu6FWtCmgul3rqNe/qLYq8kpxalsYugJQlJ9+ly99m9zd1PyL4919B/n7lg8jbnfnS9vcCklZenYoAqBDcX6KI0kfTtWgo+Vks8y3JvemuCD6xLn4/AH0qyhx7PjB/nrd+JDLnvz66a4cMxGa3F3U/E2+fNWOFEWSOa4a1wNo2j4eLb928hjVPblDXgdq7k78OXOyhawYvpxysrOYPqgi8opxaBuiC2TfihKq20d/dpEsvk3uwXePVS96KiUxHG3QC0vJNH1QRUyJoLkupYUhl/0pjnbfDsX53DOvNu54nOJm27DdXZr51vWIwFmUE3p3jP6Oz6vO6OHYcaMxuLJNUo8XLc8n98E/fp5vPrg21WGoFLtnXi23XTj4xPN4mmUWTAidFIZUlca8vzatcpmUBl0m3TwjqW5XyJ8WjOH/Fk08Uda+KJ//d/kIfn3x0IT3H2gBjaWpJ9nNUOnK88n900NHeeyN95J6TJ1MI/1M6tuxSR/kS06rjnkf0fQ//vPC03n+m6m/tTwWHUsKXNv3PfOGM6SqlC6lrZqUj60pp21h02aMRHoYpWPf+UAPq3ZpOrx0xL+2iNwrIntFZH1Q2c9EZJOIvCkij4lIqVVeLSKHRGSt9fMbF2MPK1RvmVjfJHb7ufL+1fGEpJIo0drbLRcMsi0fWNmGXlYTwWVjuoXdR6rrAN3at+b2WYP58XnuDU0bqh0aoF1R07OnF689g/u+Oty1WJJt2sBO3HLBIK6e1CvVodiK5qv0PqD5PbcvAAOMMYOAt4EbgpZtM8YMsX6uciZM96zb+Ql9v/8sH352ONWhqDQyK4oudUO7lrofSIJmDqukdZp006sqK2R87w4xbVOQm76NCyLCrNqqtL3nIWJUxpgVwEfNyp43xhyznv4DqHQhtqRY+vd3OHS0gZXb9qU6FOWgUGdujh4jwvKSgsg3RiVTqwRvnU+27Cxhcr9OQGYOLZwoJ75yLgOeCXreTUTeEJGXRWRsqI1EZL6IrBaR1fX19QkHMeXnK6j78POE99Octq+rSEJ1vUvmEAV2fcHzg2qUS2YO5KlvpEdf7CUzB3LJ6FMirnf56d1O9H5Kxpe13ySU3EXku8Ax4A9W0W6gqzFmKHAtsExEbO8oMMYsNcbUGmNqy8vLEwkDgM17DnD3ineiiDnhQ2W8/DQ9DXXC7bMG88jXTotq3UhvpQ4uXshsbuGEniceB5qLvj2194my2SO60r3cua6JiZg9oivdY+wXHmtqf3zBGC4e2TXGrfwl7sY4EZkHnANMMtbXqjHmMHDYerxGRLYBvYAkXYHUb3e3XXpaNYeONPDg6p2pDsUVM4fF0cLY7G1XlJ/DsitHOhNQHAJDyhblp1ezULBo7kdoskaMH+3BVaUYYNlrid84+MerRjs6c1uyxJXcRWQq8B3gDGPMwaDycuAjY0yDiHQHaoDI1WkXxNqcsnzVDttxqPV0sKmxNe15bsMHLcqXXTGSi3/3Wgoishft//+OOfH3xbY7RFF+Dv+4cVLSxxoJfpd64S07q7aKzR8c4NtT+jB88V9cGZvJqc/u8OqyuO54TrVoukIuB1YCvUVkl4hcDvwaKAZeaNblcRzwpoisAx4GrjLGfGS7YxcsX3WyNhntPzbw+Xx12z4WLHvdhaj8ZVyv8hbJo29FCaf1bG+/QZo7b3BnzhvcOaF9BN9gs/j8AS0S+91zT4173zUJ3OWZDk2QocbIL8jNZvH5A2kTZjCuYEM80DMp3USsXhhj5tgU3xNi3UeARxINyk1PrH2/yXMPVHLSRkFuFrk2N/qkOof89hJ3bvG//PRu3PPK9pDLo72tf3K/+O9STaQbYzrV4C8Nc1PZsitH8uA/d/LHNbuaLgj682anwzeVx/j3ylgInx0+1uT5lghtaZnaW+bUU9qGXNY8Zyz58kB3g4ngrDiT52/+LXyN2qmxUUSE8iSMUjmrtpI75gwNW2OfP667Y8e7fkrviOsEvmDCrVtbXcbPgoaOcEqmfnYDMi65N+fk2NN+Em7s6uAa4aafTGVQZSngbOJIVDTNclMHdAq7PJ7UEOqw4SZLd8otFwyO2MR047S+jh1vQVAPHTfMHRW5u6QKLeOTeySZekHVbq7IQDNEcBtzXpLmg3TawC6RR/KL9j8fzVvksjHVUe7NfcHdEH90br8URhLal4Z0prJt6FE6o5Gpn90Ab34yHZTZJ26hhR2bxfrM3Hrh4CZd2tLpbxnplPzPDkyuEMtZv1NNPN+a3Isbzu4Tdp1IOe1S64tm7qhTuHRMN26cFn5/dn4Y45dChreQpETGJ3cVvxafV599gNPx5SycWGNbbpfQo02o88f14OlvhLyZ3NakPtFd5/jBuf3Iy8kiPye2oQ+afzkHP61s2woVmSZ3Zevw0dD9jkNVDM8dlFiXwmSZ0DvxO6KDBSfWUIOJOdlCMKlvbINvNTeupvH1nz+sS1zb3/fV4XRtF12TyZwRXXn7P8+OeZTOdPxi9RpN7srWzKAP/tbFZwNw9ZlNa43Na4YDomjHTgezR0R3W3qkmm/w4sDIgE6PnW73ndCzQzF1S6bHvc/q9q2pWzKdYV1D94gKpW7J9JhHdkwV7S2T4SJVqDL1DTK5/8meJDnZWdQtmX5i+jKvX6hyKvzAl9mXhkauAafii8/pf1OnJI6V09z0JJ0VTuzjjS+uaGR8co+Uur2eyNwQ+It47Xsv1puJhkao2VaVFVK3ZHrELpWBdRN1V5SjTHrt/2Kr2Ws4o9fJprTAgGDB3XXPt/mCjeeze++lw6lbMj3mgc3SUcYndxW7wGfGzYmXE2U3cmWsH/VeHYt556fT2H7ztIjruvWXiGdoBD/UR8K9t752Rg+23zyNoqBhjm+fNTiq/1M4K284OQ/si9edkdC+0oFvk/vbez9LdQieMiyGsTu8UHPPzc5i839OpXObxqaEt//z7Lj2k5UljjXNjeoe++BT0faPt5tAOp3/P5GM62U/VlHXskJEWv5P7MpiFfyF4ofm2PSYf8sF3//T+sgr4e0PgJPsxowJxStNVfk52fzt+vEY03jBM9X/6s5tYu/CFy7JvHXTFPr94LlEQooxFnf337tjMZv3HODNH022ncVq00+m6uc1Br6tuavYxJKvT9bc0/+Tlp+TbXu3rdMC7e7huvxVlMZ3QXJsjX0ttjDvZN0snZvIovX4wjEhEzs03jUda3/5WBTme2sawkgyPrlHSmpeSGBOOB5HdvfaXyYwGJobN8H87ILBrLpxUkxnQNEKJPdwF2XtmmViUdY6z7Z83Q8nn3js9v+7IDc7pnlnnf5CS7c5bxPl22YZp3ilCSJRMSV3j5o/rjtn9evoynRzeTlZEafVizcZXTm2O2f2jT7ueP6TndoU8MzVYzn7l39vUt6mlb8SXibJ+Jp7JF5PedHOd9q7U3GLsjXfO5N/3DDJ6ZBSRkTSZh7RWCQr7r4VJfzm30J3t0y3s9g0CyftaHKPYPenX6Q6hIREe8fk9VNaDh7VriifTm1abh9oAtAPV2xi/Xu1L4pvDPhE/i09wnyJ9O9sO9e9L8Q6PIIXRGyWEZF7aZwIe68xZoBVVgY8CFQDdcAsY8zH1rIbgMuBBuAbxpjkXc6PgyYoqG5XSFnrPB77j9No0yo34tlKOvVzX3Wjf84sgj1z9Vg6xDDBh5utaq3zsvn8SAO/mD3EvYPEwcl3X5UPByOLpuZ+HzC1Wdki4EVjTA3wovUcEekHzAb6W9vcKSL+ugTtMV1Ko3/TDu3alu7lRWFrbxCU3FOc26Np504nscTat6KEdnHW3J32t2+N588LT2/SOycdRGqqKi6IPt5uPrgjtbmIyd0YswJoPsn1DOD31uPfA18KKn/AGHPYGLMd2AqMcCZUFY9vTekVcTq5WCt9J5pl4ozJKak+fqy+EuWAZemmQ0kBAyvTb1C4X0Y4k+jZoTji2PfQOMrlL+cMtV0WyxdEuom3zb2jMWY3gPU7MNpOF2Bn0Hq7rLIWRGS+iKwWkdX19fVxhpE4v3cSycnKCjn2SbsQ3d8iSZeaexePnUpnRdmu+5drE7v13edv6ROKC3IjXpeIZgTL8b072HaD/H+Xj+C5a8bFHV+qOX1B1e7da/teM8YsNcbUGmNqy8udHV9bnRTugz4zzvG8A32iW6X4NP2BK0el9PjxKMyL3EoZz6xNdjVMr53ZxOOJhWNYOjf0mWkic9eOrSmncwzNmukm3uS+R0QqAKzfe63yXUBV0HqVwPvxh+e+VNc+3Rbop7/QZjLjeLu2ff+cftw0oz/jQtw5GaxXR/e68HmpvT3gmavH2o5gmKiLXWjyeew/TnN8n07rXNqqyfDUzWXC/RuhxJvcnwDmWY/nAY8Hlc8WkXwR6QbUAKsSC1ElIvDWDp5oIze7MakHcnus7//W+TlcMro6qi8HJ3rUnNk3tqF609kp7VozbWCF4/vNsbkzNp60FtimZ4eiiEMee0G4t+iZfTuw9gdnJS+YJIuY3EVkObAS6C0iu0TkcmAJcJaIbAHOsp5jjNkAPAS8BTwLLDDGNLgVvIos8N4Ovi1+5tBKa5nzpy33X9b0+rnfz4zi4fafxIn9++Xf1q8idN/8380bTmlhfNedvCBio6kxZk6IRbYdjI0xi4HFiQSlnPGNSTUMqSptUR7o7RK4vpfouCTBxvWK//pJbrZwtCFzT6OV80SEC06t5OE1u1IdStJ5t5+PQ97e499x3689q5dtebr0dmkug5tH4/b4gjGs2t60p7L+GZuqapv4LFhelNHJ/dVtH6Y6hJQ4MWRvEk6+0208Er8ZXFXKYJuzM/BP00qiGjK01pDRY8tc/NvXUh1CSiSz5u7EIa46o7sDe0kfY3u150tDTk6fN6u28sTjcwY5f7E1Fj3Ki7iotoo7o5yv1QuONRxPdQgpkdHJPVOdHPirMfW6WbERiT5h/e8VI23LAxe9Ktu2YpIPZqfPz8nmF7OHMmdEV/7n0uFNhnu4elJNmC3dl50l/NcFg6jp2HKUUK86qsldZYpvTKxhcFUpk/u538VQJPo24FHd24VdnpeTxT2XDk88qDRx88yBTOjTgS+fWhl5ZRW3TL1Ir8k9A1W3b83jC8bQu1Mx/SpKuHnmQNeOJcSQ3UPy94ezfVF+XHelquhcelo11e1OXlQta53Hj8/rn8KIkiOjL6hmutzsLJ6+eqyrx2isuTuTnAPt91P7d6Jbuf9G8QNnvsa+Nbk3X1/+Bn3C9PHOJNXtW/PS9ROoXvQUAK9/3783LgXTmrtPjO+dPuPzfGWkO6MfBhLfb+aeynemRh7tz0ucvLY9ukc7Vn/vTIryte6WyTS5+9R1Nn3czxlUwYjqMtePvfj8k808XzujR0zb1mjzhFKO0OTuU3an97++eBgPXTU6KccP3P16Vr+Otr1xbp81mD4287a+cO0ZzBt9iu0+td+2UtHT5O4zA7qkVzuriNgmdxG4bEy3E88vdqkpxysuO73xb2E3Z61S8dBGOeWKq87owZ0vbUOALw3tzLMbPmixzqzhVcwaXtWifHL/Tvx+5btJiDJ9zBnRlTkenalJpSetuStXfHtqH+qWTCcrS5g6oIK6JdObLA839MGYnu1brK9UosZGMf+An2jN3Se8NnxGLEMfBKZSmzHE+UkuVOa4/7IRJDAxk+docveZ2lPKWP/efob5YKKFgNLCPDbeNJWCXD3RVPETEbIz6Kq8JnefOaNXOdecWZP2kxAEj6cSjVZRzD2qlDop7qqQiPQWkbVBP/tF5BoR+ZGIvBdUPs3JgFVLTaYKE9I6sedlZ/HajZMY0KVNqkNRytfirrkbYzYDQwBEJBt4D3gM+Crwc2PMrU4EqMJ75TsTKC3MY1jXtrz8dj0VadyV7vXvn0VOtlBSkJvqUJTyPaeaZSYB24wx7+rkDMlV0qoxUX59Yk+mD+pEzw7pO1RrWevYzyjycrSdXal4OJXcZwPLg54vFJFLgNXAdcaYjx06jmomN6sx+WVlSVon9nj8/dsTaK3joygVl4SrRSKSB5wH/NEqugvoQWOTzW7gthDbzReR1SKyur6+PtEwMtIjXxvt6wuNVWWFcdX2lVLO3MR0NvC6MWYPgDFmjzGmwRhzHPgtMMJuI2PMUmNMrTGmtrw8fUY09JL+nfWipFLKnhPJfQ5BTTIiEjyn2vnAegeOYevgkWNu7doTCnL9W2tXSiUmoQZNESkEzgL+Paj4FhEZQuPAhHXNljlq32dH3Nq1Ukp5WkLJ3RhzEGjXrGxuQhHFdPxkHSn9/PaS2lSHoJRKY9rPzKPaFmpfcaVUaJrcPSonW/91SqnQNEN40Hen9WVwpfaUUUqFpneIeNCV47qnOgSlVJrzdM3d2M4U6m/t9KYepVQUvJ3cMy+3M3VAp1SHoJTyAE8n90wSGI8tA7/PlFJx0OTuEYGxNjPxbEUpFTtN7h6hQykrpWLh6eTuhUrsjCGdY1r/7rmnnnj84/P6n3h89oBO9OlUzL9rTxmlVBQ8ndy94Jezh4Zclp3VsjY+pf/JC6YmqA2mbWEez14zjur2rZ0NUCnlS5rcU2h8r+iHOv7KqK4uRqKU8htN7kk0tX9s3Rgn9e0IwJNfP50+nUrcCEkp5VOa3JPo3MFN29/njj4l7PpVZYXULZnOgC461IBSKjY6/EAS9et8svZdt2Q6xxqOpzAapZSfebrmbjzW6bu8OB842QtGuzcqpdzi6Zq7t1I7FOXnULdkeqrDUEplgESn2asDDgANwDFjTK2IlAEPAtU0TrM3yxjzcWJhZoa5o8K3wSulVLScaJaZYIwZYowJzPu2CHjRGFMDvGg9d4XHWmXC2rL4bG6a0dhcM6JbWYqjUUp5nRvNMjOA8dbj3wMvAd9x4Tike8NM894xzQVa3GcO60Ju0MxKy64YybHj6f3alFLpLdHkboDnRcQAdxtjlgIdjTG7AYwxu0Wkg92GIjIfmA/QtWt8N+ike819bM/2YZdnZQnrfjCZ1vnZTcpzsrPIyQ6xkVJKRSHR5D7GGPO+lcBfEJFN0W5ofREsBaitrY0rTadzbn910UQq2hREXK+NTnStlHJBQm3uxpj3rd97gceAEcAeEakAsH7vTTTI0Md3a8+J61zaSrs6KqVSJu7kLiKtRaQ48BiYDKwHngDmWavNAx5PNMhQMnGaPaWUikYizTIdgces2mkOsMwY86yI/BN4SEQuB3YAFyYepr10rrkrpVQqxZ3cjTHvAINtyvcBkxIJKvoYknGU8F745jgOHD7GzDtfBWDF9RPIsjkf6liSn+TIlFKZzON3qKYmu4uc/GKp6VjMjn0HTyzr2q6wxfpPLBxD59JWyQpPKaU8ntzToOYerKrMPoEPqixNbiBKqYzn6YHD3LTsipHc99XhUa2rF3aVUunG0zX34y5W3U9rdgPSPfNqqWxbyCcHj3DR0n80WRa4u7RDceR+7UoplQyeTu7JbJYJzIpkp3NpK269cDDje0c/bZ5SSrnJ28ndpf0W5IZvrcrLyeLIseO0yj05RsAFp1a6FI1SSsXO023uZYV5rux3+ZWjwi6fYQ0I9sNz+7lyfKWUSpSna+523Q6dUFV2cr8/mdGf2uqmQ/B+75x+dCjJ19q6UipteTq5J8Pc0dUtytq0yuX6KX2SH4xSSkXJ080yicjJ0kG9lFL+lbHJPVi71u603SulVKr4Nrl3Kgnf51xvO1JK+Zlvk/ufFozh4pHRzfB064Utxj9TSilP821y79SmgJ+ePzDk8lHdT/aAGVJV2mSZtsYrpbzOt8k9YFwv+7tGF03tm+RIlFIqeXyf3O+/bIRt+cDKNiceFxdoj1CllL/4Prk316+ihAub3XyUk53FdWf1SlFESinlvETmUK0Skb+JyEYR2SAiV1vlPxKR90RkrfUzzblwE/fof5zGz2wuoH59Ug1l2iVSKeUTibRHHAOuM8a8bk2UvUZEXrCW/dwYc2vi4SmllIpH3DV3Y8xuY8zr1uMDwEagi1OBpcLlp3cDoHW+tsErpbzNkTZ3EakGhgKvWUULReRNEblXRNqG2Ga+iKwWkdX19fVOhHHCzGGhv2MkTD/HBRN6UrdkOgVBQ/kqpZQXJZzcRaQIeAS4xhizH7gL6AEMAXYDt9ltZ4xZaoypNcbUlpc7O8nFbXpTklIqwyWU3EUkl8bE/gdjzKMAxpg9xpgGY8xx4LeAfV9El3QqKUDCVM+l2S1KkYYpUEopL4q7cVkaM+g9wEZjzO1B5RXGmN3W0/OB9YmFGL075gxl+sCKsOsE5/2NN00lK+M6gyqlMkEiqW0MMBeY2Kzb4y0i8i8ReROYAHzTiUCj0blNAdk2Q/lu+slU2/Vb5WWTn6Pt60op/4m75m6MeQX7YViejj+cxIRqjSnIzSZL4LjRcWOUUpnBZ40SmrqVUgp8l9xDC4zfHu5iq1JK+YWvkns0eVtTu1IqE/gquZcU5IZcZnTqJaVUBvFVcu/ZoSjkst4di5MYiVJKpZbnk/vyK0cBjd0gw1l25Uj+cMVIsmy6SiqllN94foSsru0Ko1qvXVE+Y3rmuxyNUkqlB8/X3I02piulVAueT+4B2sVRKaVO8k1yV0opdZLnk3uWVWPPz/X8S1FKKcd4/oJqRZsCvjW5F+cN9vQkUEop5SjPJ3cRYeHEmlSHoZRSaUXbMpRSyoc0uSullA9pcldKKR/S5K6UUj7kWnIXkakisllEtorIIreOo5RSqiVXkruIZAP/DZwN9APmiEg/N46llFKqJbdq7iOArcaYd4wxR4AHgBkuHUsppVQzbiX3LsDOoOe7rLITRGS+iKwWkdX19fUuhaGUUpnJrZuY7EbxajJ8ozFmKbAUQETqReTdBI7XHvgwge1Tycuxg8afahp/6qRD7KeEWuBWct8FVAU9rwTeD7WyMaY8kYOJyGpjTG0i+0gVL8cOGn+qafypk+6xu9Us80+gRkS6iUgeMBt4wqVjKaWUasaVmrsx5piILASeA7KBe40xG9w4llJKqZZcGzjMGPM08LRb+29maZKO4wYvxw4af6pp/KmT1rGLTlOnlFL+o8MPKKWUD2lyV0opH/J0ck/X8WtE5F4R2Ssi64PKykTkBRHZYv1uG7TsBus1bBaRKUHlp4rIv6xld0gSZgEXkSoR+ZuIbBSRDSJytcfiLxCRVSKyzor/x16KP+jY2SLyhog86bX4RaTOOu5aEVntpfhFpFREHhaRTdZnYLRXYm/BGOPJHxp74WwDugN5wDqgX6rjsmIbBwwD1geV3QIssh4vAv7LetzPij0f6Ga9pmxr2SpgNI03hT0DnJ2E2CuAYdbjYuBtK0avxC9AkfU4F3gNGOWV+INex7XAMuBJL71/rOPWAe2blXkifuD3wBXW4zyg1Cuxt3gtyT6gg/+E0cBzQc9vAG5IdVxB8VTTNLlvBiqsxxXAZru4aew+OtpaZ1NQ+Rzg7hS8jseBs7wYP1AIvA6M9FL8NN709yIwkZPJ3Uvx19Eyuad9/EAJsB2ro4mXYrf78XKzTMTxa9JMR2PMbgDrdwerPNTr6GI9bl6eNCJSDQylsfbrmfitJo21wF7gBWOMp+IHfgF8GzgeVOal+A3wvIisEZH5VpkX4u8O1AP/YzWJ/U5EWnsk9ha8nNwjjl/jEaFeR0pfn4gUAY8A1xhj9odb1aYspfEbYxqMMUNorAGPEJEBYVZPq/hF5BxgrzFmTbSb2JSl+v0zxhgzjMYhvxeIyLgw66ZT/Dk0NqfeZYwZCnxOYzNMKOkUewteTu4xjV+TBvaISAWA9XuvVR7qdeyyHjcvd52I5NKY2P9gjHnUKvZM/AHGmE+Al4CpeCf+McB5IlJH41DZE0Xkf/FO/Bhj3rd+7wUeo3EIcC/EvwvYZZ3pATxMY7L3QuwteDm5e238mieAedbjeTS2ZQfKZ4tIvoh0A2qAVdbp3wERGWVdab8kaBvXWMe6B9hojLndg/GXi0ip9bgVcCawySvxG2NuMMZUGmOqaXxP/9UY829eiV9EWotIceAxMBlY74X4jTEfADtFpLdVNAl4ywux20p2I7/DF0Cm0dibYxvw3VTHExTXcmA3cJTGb/HLgXY0XiTbYv0uC1r/u9Zr2EzQVXWglsYPxjbg1zS70ONS7KfTeAr5JrDW+pnmofgHAW9Y8a8HfmCVeyL+Zq9lPCcvqHoifhrbrddZPxsCn0sPxT8EWG29f/4EtPVK7M1/dPgBpZTyIS83yyillApBk7tSSvmQJnellPIhTe5KKeVDmtyVUsqHNLkrpZQPaXJXSikf+v8y39m5wTga6gAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "def play_episode(env, agent, max_episode_steps=None, mode=None, render=False):\n",
    "    observation, reward, done = env.reset(), 0., False\n",
    "    agent.reset(mode=mode)\n",
    "    episode_reward, elapsed_steps = 0., 0\n",
    "    while True:\n",
    "        action = agent.step(observation, reward, done)\n",
    "        if render:\n",
    "            env.render()\n",
    "        if done:\n",
    "            break\n",
    "        observation, reward, done, _ = env.step(action)\n",
    "        episode_reward += reward\n",
    "        elapsed_steps += 1\n",
    "        if max_episode_steps and elapsed_steps >= max_episode_steps:\n",
    "            break\n",
    "    agent.close()\n",
    "    return episode_reward, elapsed_steps\n",
    "\n",
    "\n",
    "logging.info('==== train ====')\n",
    "episode_rewards = []\n",
    "for episode in itertools.count():\n",
    "    play_episode(env.unwrapped, agent,\n",
    "            max_episode_steps=env._max_episode_steps, mode='train')\n",
    "    episode_reward, elapsed_steps = play_episode(env, agent)\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('train episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "    if np.mean(episode_rewards[-20:]) > 199:\n",
    "        break\n",
    "plt.plot(episode_rewards)\n",
    "\n",
    "\n",
    "logging.info('==== test ====')\n",
    "episode_rewards = []\n",
    "for episode in range(100):\n",
    "    episode_reward, elapsed_steps = play_episode(env, agent)\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('test episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "logging.info('average episode reward = %.2f ± %.2f',\n",
    "        np.mean(episode_rewards), np.std(episode_rewards))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "env.close()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
